library(dplyr)
Attaching package: ‘dplyr’
The following objects are masked from ‘package:stats’:
filter, lag
The following objects are masked from ‘package:base’:
intersect, setdiff, setequal, union
library(ggplot2)
library(forcats)
library(here)
here() starts at /Users/avrilwang/Desktop/Project-Plasmodium
library(deSolve)
library(crone)
library(optimParallel)
Loading required package: parallel
library(doParallel)
Loading required package: foreach
Loading required package: iterators
library(doRNG)
Loading required package: rngtools
library(arrow)
Attaching package: ‘arrow’
The following object is masked from ‘package:utils’:
timestamp
library(stringr)
library(parallel)
library(ggpubr)
Notebook for plotting all of the figures for PloS Biology manuscript submission
Guidelines: taken from https://journals.plos.org/plosbiology/s/figures#loc-figure-file-requirements 1. format: eps 2. max file size: 10 MB 3. text size: Arial, Times, or Symbol font only in 8-12 point 2. figure size: Width: 789 – 2250 pixels (at 300 dpi). Height maximum: 2625 pixels (at 300 dpi).
#=========================================# figure 1: best single and co-infection cue #=========================================# Figure displaying the reaction norms of best single and co-infection.
#——- optimal cue reaction norm ———–# # read data
# single infection dynamics, reaction norms, and rugs
si_dyn.df <- read_parquet(here("data/si_dyn/si_dyn_30.parquet"))
si_rn.df <- read_parquet(here("data/si_dyn/si_rn.parquet"))
si_rug.df <- read_parquet(here("data/si_dyn/si_rug.parquet"))
# co-infection dynamics, reaction norms, and rugs
ci_dyn.df <- read_parquet(here("data/ci_dyn/ci_dyn.parquet"))
ci_rn.df <- read_parquet(here("data/ci_dyn/ci_rn.parquet"))
ci_rug.df <- read_parquet(here("data/ci_dyn/ci_rug.parquet"))
process data for reaction norm
# import labelling scheme
ez_label <- read.csv(here("data/ez_label.csv"))
# get si_label with ci cue range
si_ci_rug.df <- ci_rug.df %>%
mutate(label_si = case_when(
label %in% c("I", "I1+I2") ~ "I",
label %in% c("I log","I1+I2 log") ~ "I log",
label %in% c("Ig", "Ig1+Ig2") ~ "Ig",
label %in% c("Ig log") ~ "Ig log",
label %in% c("sum", "I+Ig") ~ "I+Ig",
label %in% c("sum log", "I+Ig log") ~ "I+Ig log",
label == "R" ~ "R",
label == "R log" ~ "R log",
label %in% c("G", "G1+G2") ~ "G",
label == "G log" ~ "G log"
))
# get limit for si_rug
si_rug_lim.df <- si_rug.df %>%
filter(time <= 20) %>%
group_by(label)%>%
summarise(min = min(value, na.rm = T)*0.9,
max = max(value, na.rm = T)*1.1) %>%
select(label_si = label, min_si = min, max_si = max)
# filter to restriction conversion rate reaction norm range to cue ranges that appear in rug
## change to Inf/-inf to NA. Note that I am first joining with si rug lim to check which limit is larger, We will go with the cue range that has the largest span
ci_rug_lim.df <- si_ci_rug.df %>%
group_by(label) %>%
mutate(min = min(value, na.rm = T)*0.9,
max = max(value, na.rm = T)*1.1) %>%
distinct(label, .keep_all = T) %>%
select(label, label_si, min, max)
rug_lim.final <- ci_rug_lim.df %>% left_join(si_rug_lim.df, by = "label_si") %>%
mutate(final_min = min(min, min_si),
final_max = max(max, max_si))
# get second rug_lim.final for single infection
rug_lim.final2 <- rug_lim.final %>%
group_by(label_si) %>%
mutate(final_min = min(final_min, na.rm = T),
final_max = max(final_max, na.rm = T))
# filter ci_rn by limit
ci_rn.df2 <- ci_rn.df %>%
left_join(rug_lim.final, by = "label") %>%
group_by(label) %>%
filter(cue_range <= final_max & cue_range >= final_min)
match single infection rn with coinfection
# get ci label to si rug and filter by limit
si_rn.df2 <- left_join(si_rn.df, select(ez_label, label_si, label_ci), by = c("label" = "label_si")) %>%
left_join(rug_lim.final, by = c("label_ci" = "label")) %>%
group_by(label_ci) %>%
filter(cue_range <= final_max & cue_range >= final_min) %>%
select(cue_range, cr, label_ci, label_si)
# get ci label to si rug
si_rug.df2 <- select(si_rug.df, value, label_si = label)
plot reaction norm
# join with ezlabel
ci_rn.df3 <- ci_rn.df2 %>% left_join(ez_label, by = c("label" = "label_ci"))
si_rn.df3 <- si_rn.df2 %>% left_join(ez_label, by = "label_ci")
ci_rug.df3 <- ci_rug.df %>% left_join(ez_label, by = c("label" = "label_ci"))
si_rug.df3 <- si_rug.df2 %>% left_join(ez_label, by = "label_si")
# redo order of cues
ci_rn.df3$ez_label <- factor(ci_rn.df3$ez_label,
levels = c("Asexual iRBC", "Asexual iRBC log10",
"Total asexual iRBC", "Total asexual\niRBC log10",
"Sexual iRBC", "Sexual iRBC log10",
"Asexual&sexual iRBC", "Asexual&sexual\niRBC log10",
"Total iRBC", "Total iRBC log10",
"Gametocyte", "Gametocyte log10",
"Total gametocyte", "Total sexual iRBC",
"RBC", "RBC log10"))
si_rn.df3$ez_label <- factor(si_rn.df3$ez_label,
levels = c("Asexual iRBC", "Asexual iRBC log10",
"Total asexual iRBC", "Total asexual\niRBC log10",
"Sexual iRBC", "Sexual iRBC log10",
"Asexual&sexual iRBC", "Asexual&sexual\niRBC log10",
"Total iRBC", "Total iRBC log10",
"Gametocyte", "Gametocyte log10",
"Total gametocyte", "Total sexual iRBC",
"RBC", "RBC log10"))
ci_rug.df3$ez_label <- factor(ci_rug.df3$ez_label,
levels = c("Asexual iRBC", "Asexual iRBC log10",
"Total asexual iRBC", "Total asexual\niRBC log10",
"Sexual iRBC", "Sexual iRBC log10",
"Asexual&sexual iRBC", "Asexual&sexual\niRBC log10",
"Total iRBC", "Total iRBC log10",
"Gametocyte", "Gametocyte log10",
"Total gametocyte", "Total sexual iRBC",
"RBC", "RBC log10"))
si_rug.df3$ez_label <- factor(si_rug.df3$ez_label,
levels = c("Asexual iRBC", "Asexual iRBC log10",
"Total asexual iRBC", "Total asexual\niRBC log10",
"Sexual iRBC", "Sexual iRBC log10",
"Asexual&sexual iRBC", "Asexual&sexual\niRBC log10",
"Total iRBC", "Total iRBC log10",
"Gametocyte", "Gametocyte log10",
"Total gametocyte", "Total sexual iRBC",
"RBC", "RBC log10"))
# plot
opt_cue_pl.B <- ggplot() +
geom_line(data = ci_rn.df3, aes(x = cue_range, y = cr, color = "Co-infection")) +
geom_point(data = ci_rn.df3 %>%
group_by(label) %>%
mutate(ten_th = round(n()/10)) %>%
filter(row_number() %% ten_th == 0), aes(x = cue_range, y = cr, color = "Co-infection", shape = "Co-infection"), size = 2) +
geom_line(data = si_rn.df3, aes(x = cue_range, y = cr, color = "Single infection")) +
geom_point(data = si_rn.df3 %>%
group_by(label_ci) %>%
mutate(ten_th = round(n()/10)) %>%
filter(row_number() %% ten_th == 0), aes(x = cue_range, y = cr, color = "Single infection", shape = "Single infection"), size = 2) +
geom_rug(data = ci_rug.df3, aes(x = value), color = "#4575b4", sides = "t", length = unit(0.1, "npc")) +
geom_rug(data = si_rug.df3, aes(x = value), color = "#fc8d59", sides = "b", length = unit(0.1, "npc")) +
facet_wrap(~ez_label, scales = "free_x", ncol = 2) +
ylim(-0.3, 1.3) +
theme_bw() +
labs(y = "Conversion rate", x = "Cue range", color = "Model", shape = "Model") +
scale_x_continuous(labels = function(x) format(x, scientific = T),
guide = guide_axis(check.overlap = TRUE)) +
theme(axis.text.x = element_text(size = 7),
legend.position = "bottom") +
scale_color_manual(values=c( "#4575b4", "#fc8d59")) +
theme(strip.text.x = element_text(margin = margin(b = 0.5, t = 0.5)))
#———- plot best fitness x vs y plot ————–# # import in data
# single infection dynamics
si_dyn.df <- read_parquet(here("data/si_dyn/si_dyn_30.parquet"))
# co-infection dynamics
ci_dyn.df <- read_parquet(here("data/ci_dyn/ci_dyn.parquet"))
ez_label <- read.csv(here("data/ez_label.csv"))
process for final 20 days fitness
# get single infection maximum tau_cum for 20 days
si_fitness.df <- si_dyn.df %>%
filter(variable == "tau_cum" & time == 20)
# get co-infection maximum tau_cum for 20 days
ci_fitness.df <- ci_dyn.df %>%
filter(variable == "tau_cum1" & time == 20)
plot
si_opt.df
Error: object 'si_opt.df' not found
plot reaction norm and fitness value together
ggarrange(opt_cue_pl.A, opt_cue_pl.B, widths = c(0.6, 0.4), align = "h", labels = c("A", "B"))
Warning: Graphs cannot be horizontally aligned unless the axis parameter is set. Placing graphs unaligned.
ggsave(units = "px", dpi = 300, width = 2000, height = 1300, filename = here("figures/plos-bio/rn_fitness.tiff"), bg = "white", scale = 2.1)

#===========================================================# # Demographic stochasticity #===========================================================# #———- plot heat map—————# # import in all fitness files
file_ls <- list.files(path = here("data/MC_partitioned/"), pattern = "*.csv", full.names = T)
name_ls <- list.files(path = here("data/MC_partitioned/"), pattern = "*.csv")
name_ls <- gsub("*.csv", "", name_ls)
# 60, which is about right
length(file_ls)
# read in files
fitness.ls <- lapply(file_ls, read.csv)
# assign unique ID
fitness.ls <- mapply(cbind, fitness.ls, "ID" = name_ls, SIMPLIFY = F)
process data
# get metainfo from ID
fitness.ls2 <- mclapply(fitness.ls, function(x){
id_col <- x$ID
# string split to extract all info
cue <- unlist(str_split(unique(id_col), pattern = "_"))[[3]]
log <- unlist(str_split(unique(id_col), pattern = "_"))[[4]]
rand_var <- unlist(str_split(unique(id_col), pattern = "_"))[[5]]
# get mean
mean_fitness <- mean(x$max_fitness)
# get sd
sd_fitness <- sd(x$max_fitness)
# bind results
res <- cbind(x, cue= cue, log = log, rand_var = rand_var, mean_fitness = mean_fitness, sd_fitness = sd_fitness)
return(res)
})
Get reference data
reference_ls <- list.files(path = here("data/MC2"), pattern = "*.csv", full.names = T)
reference_name.ls <- gsub("*.csv", "", list.files(path = here("data/MC2/"), pattern = "*.csv"))
# read in the files
reference.ls <- lapply(reference_ls, read.csv)
# assign unique ID
reference.ls <- mapply(cbind, reference.ls, "ID" = reference_name.ls, SIMPLIFY = F)
# get meta data
reference.ls2 <- mclapply(reference.ls, function(x){
id_col <- x$ID
# string split to extract all info
cue <- unlist(str_split(unique(id_col), pattern = "_"))[[2]]
# get log
third_col <- unlist(str_split(unique(id_col), pattern = "_"))[[3]]
log <- ifelse(third_col == "log", "log10", "none")
# get mean
mean_fitness <- mean(x$max_fitness)
# get sd
sd_fitness <- sd(x$max_fitness)
# bind results
res <- cbind(x, cue= cue, log = log, rand_var = "all", ref_mean_fitness = mean_fitness, ref_sd_fitness = sd_fitness)
return(res)
})
combine MC partitioned and reference df
# get unique column values for each cue, log, and rand_var combo
fitness.ls3 <- do.call(rbind, fitness.ls2)
fitness.ls3 <- fitness.ls3 %>% dplyr::distinct(ID, .keep_all = T)
# repeat with reference
reference.ls3 <- do.call(rbind, reference.ls2)
reference.ls3 <- reference.ls3 %>% dplyr::distinct(ID, .keep_all = T)
# combine!
ref_fit.df <- left_join(fitness.ls3, reference.ls3, by = c("cue" = "cue", "log"= "log"))
compute proportion fitness and variation
ref_fit.df2 <- ref_fit.df %>%
mutate(p_sd = sd_fitness/ref_sd_fitness,
p_mean = ref_mean_fitness/mean_fitness,
cue_log = paste0(cue, "_", log),
label = case_when(
cue == "G" ~ "Gametocyte",
cue == "I" ~ "Asexual iRBC",
cue == "I+Ig" ~ "Asexual&sexual\niRBC",
cue == "Ig" ~ "Sexual iRBC",
cue == "R" ~ "RBC"
),
parameter = case_when(
rand_var.x == "rho" ~ "ρ",
rand_var.x == "phin" ~ "ϕn",
rand_var.x == "phiw"~ "ϕw",
rand_var.x == "psin" ~ "ψn",
rand_var.x == "psiw" ~ "ψw",
rand_var.x == "beta" ~ "β"
))
plot!
# variation
mc_b <- ggplot() +
geom_tile(data = ref_fit.df2 , aes(x = label, y = parameter, fill = p_sd)) +
facet_wrap(~log) +
theme_bw() +
viridis::scale_fill_viridis() +
labs(x = "Cue", y = "Parameter randomized", fill = expression(frac(sd("1 parameter randomized"), sd("all parameters randomized")))) +
theme(legend.position="top",
axis.text.x = element_text(angle = 45, hjust=1))
# mean fitness
mc_c <- ggplot() +
geom_tile(data = ref_fit.df2 , aes(x = label, y = parameter, fill = p_mean)) +
facet_wrap(~log) +
theme_bw() +
viridis::scale_fill_viridis() +
labs(x = "Cue", y = "Parameter randomized", fill = expression(frac(Mean("all parameters randomized"), Mean("1 parameter randomized")))) +
theme(legend.position="top",
axis.text.x = element_text(angle = 45, hjust=1))
mc_partition <- ggarrange(mc_b, mc_c, ncol = 1)
#————– get violine plot of variation in fitness ——————–# # read MC data
# read in dymamics
mc_G_log.dyn <- read_parquet(here("data/MC2/mc_G_log_dyn.parquet"))
mc_G.dyn <- read_parquet(here("data/MC2/mc_G_dyn.parquet"))
mc_R_log.dyn <- read_parquet(here("data/MC2/mc_R_log_dyn.parquet"))
mc_R.dyn <- read_parquet(here("data/MC2/mc_R_dyn.parquet"))
mc_I_log.dyn <- read_parquet(here("data/MC2/mc_I_log_dyn.parquet"))
mc_I.dyn <- read_parquet(here("data/MC2/mc_I_dyn.parquet"))
mc_Ig_log.dyn <- read_parquet(here("data/MC2/mc_Ig_log_dyn.parquet"))
mc_Ig.dyn <- read_parquet(here("data/MC2/mc_Ig_dyn.parquet"))
mc_I_Ig_log.dyn <- read_parquet(here("data/MC2/mc_I+Ig_log_dyn.parquet"))
mc_I_Ig.dyn <- read_parquet(here("data/MC2/mc_I+Ig_dyn.parquet"))
# read in fitness
mc_G_log.fitness <- read.csv(here("data/MC2/mc_G_log_fitness.csv"))
mc_G.fitness <- read.csv(here("data/MC2/mc_G_fitness.csv"))
mc_R_log.fitness <- read.csv(here("data/MC2/mc_R_log_fitness.csv"))
mc_R.fitness <- read.csv(here("data/MC2/mc_R_fitness.csv"))
mc_I_log.fitness <- read.csv(here("data/MC2/mc_I_log_fitness.csv"))
mc_I.fitness <- read.csv(here("data/MC2/mc_I_fitness.csv"))
mc_Ig_log.fitness <- read.csv(here("data/MC2/mc_Ig_log_fitness.csv"))
mc_Ig.fitness <- read.csv(here("data/MC2/mc_Ig_fitness.csv"))
mc_I_Ig_log.fitness <- read.csv(here("data/MC2/mc_I+Ig_log_fitness.csv"))
mc_I_Ig.fitness <- read.csv(here("data/MC2/mc_I+Ig_fitness.csv"))
examine variation
# quantify variance and mean
fitness_var.df <- fitness.df %>%
dplyr::group_by(id) %>%
dplyr::summarise(median = median(max_fitness)) %>%
dplyr::mutate(id = forcats::fct_reorder(id, mean))
Error in `mutate_cols()`:
! Problem with `mutate()` column `id`.
ℹ `id = forcats::fct_reorder(id, mean)`.
✖ length(f) == length(.x) is not TRUE
Caused by error in `forcats::fct_reorder()`:
! length(f) == length(.x) is not TRUE
Backtrace:
1. ... %>% dplyr::mutate(id = forcats::fct_reorder(id, mean))
7. forcats::fct_reorder(id, mean)
8. base::stopifnot(length(f) == length(.x))
9. base::stop(simpleError(msg, call = if (p <- sys.parent(1L)) sys.call(p)))
plot violin with difference in deterministic model fitness and mean model fitness

#————– plot together———————–#

#=========================================================# # time series conversion rate for single and co-infection #=========================================================# #——— single infection conversion rate heat map————–# # process info for single infection
# get fintess
si_cr.df <- si_dyn.df %>%
filter(time <= 20 & variable == "cr")
# good cue bad cue
si_cue.dv <- si_fitness.df %>%
mutate(classification = case_when(
value > 9.2 ~ "Good performing",
value <= 9.2 ~ "Poor performing"
))
# join with classificaiton
si_cr.df2 <- si_cr.df %>%
left_join(select(si_cue.dv, id, classification, fitness_si = value), by = "id") %>%
left_join(ez_label, by = c("id" = "id_si"))
# split into top erforming and poor-performing cues
si_cr.high <- si_cr.df2 %>% filter(classification == "Good performing")
si_cr.poor <- si_cr.df2 %>% filter(classification == "Poor performing")
si_cue.dv
plot single infection conversion rate heatmap
# plot poor performing
si_cr.pl1 <- ggplot() +
geom_tile(data = si_cr.poor, aes(x = time, y = forcats::fct_reorder(ez_label_si, fitness_si), fill = value)) +
labs(x = "Time (days)", y = "Low performing\nsingle infection cues", fill = "Conversion rate") +
scale_fill_viridis_c(limits = c(0, 1)) +
xlim(1, 20) +
theme_bw()
# plot high perfomring
si_cr.pl2 <- ggplot() +
geom_tile(data = si_cr.high, aes(x = time, y = forcats::fct_reorder(ez_label_si, fitness_si), fill = value)) +
labs(x = "", y = "High performing\nsingle infection cues", fill = "Conversion rate") +
scale_fill_viridis_c(limits = c(0, 1)) +
xlim(1, 20) +
theme_bw()
—————–co-infection conversion rate heatmap———–
plot co-infeciton convesion rate heatmap
plot
i
Error: object 'i' not found
#——— assemble final figure ————–#
si_cr.pl <- ggarrange(si_cr.pl2, si_cr.pl1, common.legend = T, ncol = 1, nrow = 2)
ci_cr.pl <- ggarrange(ci_cr.pl2, ci_cr.pl1, common.legend = T, ncol = 1, nrow = 2)
ggarrange(si_cr.pl, ci_cr.pl, labels = c("A", "B"), ncol = 2, common.legend = T)
ggsave(units = "px", dpi = 300, width = 2000, height = 1000, filename = here("figures/plos-bio/time_cr.tiff"), bg = "white", scale = 1.7)
#——————————————–# # dual cue optimization figure #——————————————–#
source(here("functions/chabaudi_si_clean_high.R"))
source(here("functions/chabaudi_si_clean.R"))
source(here("functions/par_to_hm_te.R"))
#———- plotting fitness of dual vs single cue opt ———# # import in previous data
# dual cue fitness
dual_fitness.df <- read.csv(here("data/dual_cue_opt4/dual_cue_fitness_20.csv"))
## make label and filter out very low fitness
dual_fitness.df <- dual_fitness.df %>%
mutate(temp_label = gsub("log", "log10", label),
temp_label_b = gsub("log", "log10", label_b),
label_final = paste0(temp_label, "+", temp_label_b)) %>%
filter(value > 2)
# get single cue fitness
si_dyn.df <- read_parquet(here("data/si_dyn/si_dyn_30.parquet"))
si_fitness.df <- si_dyn.df %>%
filter(variable == "tau_cum" & time == 20)
# join si and dual cue
dual_si_fitness.df <- dual_fitness.df %>%
left_join(select(si_fitness.df, id, si_fitness = value), by = "id") %>%
left_join(select(si_fitness.df, id_b = id, si_fitness_b = value), by = "id_b") %>%
mutate(si_fitness_max = ifelse(si_fitness > si_fitness_b, si_fitness, si_fitness_b),
dual_label = gsub("log", "log10", paste(label, "+", label_b)))
plot

#———– time series conversion rate ————-# # dynamics simulation of high parameter cues (these serve as reference points)
# best dual cue combo
dual.cr <- chabaudi_si_clean(
parameters_cr = c(4.446192033, 10.97518275, 1.38762817, 23.3059254, -3.452052371, -18.0070692, 39.66614226, -3.545193141, 18.78350799),
immunity = "tsukushi",
parameters = parameters_tsukushi,
time_range = seq(0, 20, by = 1e-3),
cue_range = seq(6, 7, by = 1/500),
cue_range_b = seq(0, log10(6*(10^6)), by = (log10(6*(10^6)))/500),
cue = "R",
cue_b = "I",
log_cue = "log10",
log_cue_b = "log10",
solver = "vode",
dyn = T
)
Error in force(parameters) : object 'parameters_tsukushi' not found
plot
ggplot() +
geom_line(data = dual_cr.df, aes(color = label_new, x = time, y = value), size = 1) +
geom_point(data = dual_cr.df %>% filter(time%%1 == 0), aes(color = label_new, x = time, y = value, shape = label_new), size = 3) +
labs(x = "Time (days)", y = "Conversion rate", color = "Cue", shape = "Cue") +
xlim(0, 20) +
scale_color_manual(values = c("#fc8d59","#fdcb44","black", "#4575b4")) +
theme_bw() +
theme(legend.position="bottom") +
guides(color = guide_legend(nrow = 2, byrow = TRUE))
Warning: Removed 3 rows containing missing values (`geom_line()`).
Warning: Removed 3 rows containing missing values (`geom_point()`).

#———— reaction norm heatmap of R log10 + I log10 ————# # process data
max(R_Ig.dyn$Ig)
[1] 175754.2
plot
# just testing for sexual iRBC vs RBC
ggplot() +
geom_path(data = R_Ig.dyn, aes(x = Ig, y = log_R), color = "white", arrow = arrow(angle = 30, length = unit(0.1, "inches"))) +
geom_point(data = R_Ig.dyn %>% filter(row_number() %% 1000 == 1 & time <= 20), aes(x = Ig, y = log_R), color = "white") +
scale_x_continuous(trans = "log10") +
xlim(0, 200000) +
labs(y = "RBC log10", x = "Sexual iRBC log10", fill = "Conversion rate") +
theme_dark()
Scale for x is already present.
Adding another scale for x, which will replace the existing scale.

save figure for poster
dual_rn.pl2 <- ggplot() +
geom_raster(data = R_I.hm, aes(x = cue_range_b, y = cue_range, fill = cr)) +
scale_fill_viridis_c() +
geom_path(data = R_I.dyn, aes(x = log_I, y = log_R), color = "white", arrow = arrow(angle = 30, length = unit(0.1, "inches"))) +
geom_point(data = R_I.dyn %>% filter(row_number() %% 1000 == 1 & time <= 20), aes(x = log_I, y = log_R), color = "white") +
xlim(0.99*min(hablar::s(R_I.dyn$log_I), na.rm = T), 1.01* max(hablar::s(R_I.dyn$log_I), na.rm = T)) +
ylim(0.99*min(hablar::s(R_I.dyn$log_R), na.rm = T),1.01* max(hablar::s(R_I.dyn$log_R), na.rm = T)) +
labs(y = "RBC log10", x = "Asexual iRBC log10", fill = "Conversion rate") +
theme_dark() + theme(legend.position="top")
dual_cr.pl2 <- ggplot() +
geom_line(data = dual_cr.df, aes(color = label_new, x = time, y = value), size = 1) +
geom_point(data = dual_cr.df %>% filter(time%%1 == 0), aes(color = label_new, x = time, y = value, shape = label_new), size = 2) +
labs(x = "Time (days)", y = "Conversion rate", color = "Cue", shape = "Cue") +
xlim(0, 20) +
scale_color_manual(values = c("#4575b4", "#91bfdb","#fc8d59","#fdcb44")) +
theme_bw() +
theme(legend.position="top") +
guides(color = guide_legend(nrow = 2, byrow = TRUE))
ggarrange(dual_cr.pl2, dual_rn.pl2, align = "h", widths = c(1.25, 1))
ggsave(here("poster/dual_cue.png"), width = 7, height = 4)
#——– assemble final figure ————-#

#============================================# # real life disease curve #============================================# # execute code from report 10 to get final dataset The following graphs will be made: - R vs iRBC - R log10 vs iRBC - R vs iRBC log10 - R log10 vs iRBC log10
G vs iRBC
G log10 vs iRBC
G vs iRBC log10
G log10 vs iRBC log10
R vs G
R log10 vs G
R vs G log10
R log10 vs G log10
R on y-axis
r_i.dc <- ggplot(exp_ss.df) +
geom_point(aes(y = RBC, x = asex)) +
geom_path(aes(y = RBC, x = asex, colour = day, group = id), arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
theme_bw() +
scale_color_viridis_c(limits = c(3, 21)) +
labs(x = "iRBC per µL", y = "RBC per µL", color = "Days\npost-infection") +
scale_y_continuous(labels = function(x) format(x, scientific = TRUE))
rlog_i.dc <- ggplot(exp_ss.df) +
geom_point(aes(y = log10(RBC), x = asex)) +
geom_path(aes(y = log10(RBC), x = asex, colour = day, group = id), arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
theme_bw() +
scale_color_viridis_c(limits = c(3, 21)) +
labs(x = "iRBC per µL", y = "log10(RBC) per µL", color = "Days\npost-infection") +
scale_y_continuous(labels = function(x) format(x, scientific = TRUE))
r_ilog.dc <-ggplot(exp_ss.df) +
geom_point(aes(y = RBC, x = log10(asex))) +
geom_path(aes(y = RBC, x = log10(asex), colour = day, group = id), arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
theme_bw() +
scale_color_viridis_c(limits = c(3, 21)) +
labs(x = "log10(iRBC) per µL", y = "RBC per µL", color = "Days\npost-infection") +
scale_y_continuous(labels = function(x) format(x, scientific = TRUE))
rlog_ilog.dc <- ggplot(exp_ss.df) +
geom_point(aes(y = log10(RBC), x = log10(asex))) +
geom_path(aes(y = log10(RBC), x = log10(asex), colour = day, group = id), arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
theme_bw() +
scale_color_viridis_c(limits = c(3, 21)) +
labs(x = "log10(iRBC) per µL", y = "log10(RBC) per µL", color = "Days\npost-infection") +
scale_y_continuous(labels = function(x) format(x, scientific = TRUE))
G on y-axis
g_i.dc <- ggplot(exp_ss.df) +
geom_point(aes(y = gam, x = asex)) +
geom_path(aes(y = gam, x = asex, colour = day, group = id), arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
theme_bw() +
scale_color_viridis_c(limits = c(3, 21)) +
labs(x = "iRBC per µL", y = "Gametocyte per µL", color = "Days\npost-infection") +
scale_y_continuous(labels = function(x) format(x, scientific = TRUE))
glog_i.dc <- ggplot(exp_ss.df) +
geom_point(aes(y = log10(gam), x = asex)) +
geom_path(aes(y = log10(gam), x = asex, colour = day, group = id), arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
theme_bw() +
scale_color_viridis_c(limits = c(3, 21)) +
labs(x = "iRBC per µL", y = "log10(Gametocyte) per µL", color = "Days\npost-infection") +
scale_y_continuous(labels = function(x) format(x, scientific = TRUE))
g_ilog.dc <- ggplot(exp_ss.df) +
geom_point(aes(y = gam, x = log10(asex))) +
geom_path(aes(y = gam, x = log10(asex), colour = day, group = id), arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
theme_bw() +
scale_color_viridis_c(limits = c(3, 21)) +
labs(x = "log10(iRBC) per µL", y = "Gametocyte per µL", color = "Days\npost-infection") +
scale_y_continuous(labels = function(x) format(x, scientific = TRUE))
glog_ilog.dc <- ggplot(exp_ss.df) +
geom_point(aes(y = log10(gam), x = log10(asex))) +
geom_path(aes(y = log10(gam), x = log10(asex), colour = day, group = id), arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
theme_bw() +
scale_color_viridis_c(limits = c(3, 21)) +
labs(x = "log10(iRBC) per µL", y = "log10(Gametocyte) per µL", color = "Days\npost-infection") +
scale_y_continuous(labels = function(x) format(x, scientific = TRUE))
R vs G
r_g.dc <- ggplot(exp_ss.df) +
geom_point(aes(y = RBC, x = gam)) +
geom_path(aes(y = RBC, x = gam, colour = day, group = id), arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
theme_bw() +
scale_color_viridis_c(limits = c(3, 21)) +
labs(x = "Gametocyte per µL", y = "RBC per µL", color = "Days\npost-infection") +
scale_y_continuous(labels = function(x) format(x, scientific = TRUE))
rlog_g.dc <- ggplot(exp_ss.df) +
geom_point(aes(y = log10(RBC), x = gam)) +
geom_path(aes(y = log10(RBC), x = gam, colour = day, group = id), arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
theme_bw() +
scale_color_viridis_c(limits = c(3, 21)) +
labs(x = "Gametocyte per µL", y = "log10(RBC per µL)", color = "Days\npost-infection") +
scale_y_continuous(labels = function(x) format(x, scientific = TRUE))
r_glog.dc <- ggplot(exp_ss.df) +
geom_point(aes(y = RBC, x = log10(gam))) +
geom_path(aes(y = RBC, x = log10(gam), colour = day, group = id), arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
theme_bw() +
scale_color_viridis_c(limits = c(3, 21)) +
labs(x = "log10(Gametocyte) per µL", y = "RBC per µL", color = "Days\npost-infection") +
scale_y_continuous(labels = function(x) format(x, scientific = TRUE))
rlog_glog.dc <- ggplot(exp_ss.df) +
geom_point(aes(y = log10(RBC), x = log10(gam))) +
geom_path(aes(y = log10(RBC), x = log10(gam), colour = day, group = id), arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
theme_bw() +
scale_color_viridis_c(limits = c(3, 21)) +
labs(x = "log10(Gametocyte) per µL", y = "log10(RBC) per µL", color = "Days\npost-infection") +
scale_y_continuous(labels = function(x) format(x, scientific = TRUE))
plot together


#===========================================# # static competition #===========================================# #——- heat map —————# # calculate fitness difference for 20 days
static.ls <- lapply(static.ls, read_parquet)
Error: file must be a "InputStream"
plot
ggarrange(static.pl1, static.pl2, align = "hv", widths = c(1, 0.2))
Warning: Graphs cannot be vertically aligned unless the axis parameter is set. Placing graphs unaligned.

ggarrange(static.pl1, static.pl2, align = "hv", widths = c(1, 0.2))
Warning: Graphs cannot be vertically aligned unless the axis parameter is set. Placing graphs unaligned.
ggsave(here("figures/plos-bio/static_competition_a.tiff"), units = "px", width = 2250, height = 1500, scale = 1.2, dpi=300, bg = "white")

#—— effect cue perception ——-# ## logging
# get non-logged pairings
static_nolog <- static.df2 %>%
mutate(cue_1 = trimws(gsub("\\ .*", "", label_ci_1)),
log_1 = case_when(
str_detect(label_ci_1, "log") ~ "log",
str_detect(label_ci_1, "log", negate = T) ~ "none")) %>%
filter(log_1 == "none")
static_log <- static.df2 %>%
mutate(cue_1 = trimws(gsub("\\ .*", "", label_ci_1)),
log_1 = case_when(
str_detect(label_ci_1, "log") ~ "log",
str_detect(label_ci_1, "log", negate = T) ~ "none")) %>%
filter(log_1 == "log")
static_log.df <- left_join(
select(static_nolog, cue_1, label_ci_2, log_1, None = fitness_difference),
select(static_log, cue_1, label_ci_2, log_1, Log = fitness_difference),
by = c("cue_1", "label_ci_2")) %>%
filter(!is.na(None) & !is.na(Log)) %>%
mutate(classification = ifelse(Log > None, "Logged better", "Not logged better"))
combined
static_nocomb <- static.df2 %>%
mutate(cue_1 = ifelse(
str_detect(label_ci_1, "sum"), "I+Ig",
trimws(gsub("+1.*|\\ log", "", label_ci_1))
),
log_1 = case_when(
str_detect(label_ci_1, "log") ~ "log",
str_detect(label_ci_1, "log", negate = T) ~ "none"),
comb_1 = case_when(
str_detect(label_ci_1, "1+|sum") ~ "comb",
str_detect(label_ci_1, "1+|sum", negate = T) ~ "none"
)) %>%
filter(comb_1 == "none")
static_comb <- static.df2 %>%
mutate(cue_1 = ifelse(
str_detect(label_ci_1, "sum"), "I+Ig",
trimws(gsub("+1.*|\\ log", "", label_ci_1))
),
log_1 = case_when(
str_detect(label_ci_1, "log") ~ "log",
str_detect(label_ci_1, "log", negate = T) ~ "none"),
comb_1 = case_when(
str_detect(label_ci_1, "1+|sum") ~ "comb",
str_detect(label_ci_1, "1+|sum", negate = T) ~ "none"
)) %>%
filter(comb_1 == "comb")
static_comb.df <- left_join(
select(static_nocomb, cue_1, label_ci_2, log_1, Self = fitness_difference),
select(static_comb, cue_1, label_ci_2, log_1, Total = fitness_difference),
by = c("cue_1", "log_1", "label_ci_2")) %>%
filter(!is.na(Total) & !is.na(Self)) %>%
mutate(classification = ifelse(Total > Self, "Total better", "Self better"))
plot

ggarrange(static_log.pl, static_comb.pl, ncol = 1, nrow = 2, align = "v")
ggarrange(static_log.pl, static_comb.pl, ncol = 1, nrow = 2, align = "v")
ggsave(here("figures/plos-bio/static_competition_b.tiff"), units = "px", width = 1000, height = 2000, scale = 1.2, dpi=300, bg = "white")

#===========================================# # invasion analysis #===========================================# # import in data (already 20 days )
invade.df <- read.csv(here("data/ci_invasion.csv"))
process data for invasion matrix
invade.mat <- invade.df %>%
group_by(V1 = pmin(mut_id, res_id), V2 = pmax(mut_id, res_id)) %>% # group by cue competition, irregardless of order
mutate(id_alt = paste0(V1, V2),
invade = case_when(
fitness > 0 ~ "invade",
fitness < 0 ~ "not invade"
)) %>%
group_by(id_alt) %>%
mutate(
mut_is_V1 = case_when(
mut_id == V1 ~ "V1_invade",
mut_id != V1 ~ "V1_invaded")) %>%
arrange(id_alt) %>%
select(fitness, V1, V2, id_alt, invade, mut_is_V1) %>%
tidyr::pivot_wider(names_from = mut_is_V1, values_from = fitness) %>%
group_by(id_alt) %>%
mutate(V1_invade2 = gsub("NA", "", paste0(V1_invade, collapse = "")),
V1_invaded2 = gsub("NA", "", paste0(V1_invaded, collapse = ""))) %>%
distinct(id_alt, .keep_all = T) %>%
mutate(
category = case_when(
V1_invade2 > 0 & V1_invaded2 > 0 ~ "Mutual invasion",
V1_invade2 > 0 & V1_invaded2 < 0 ~ "Only strain 1 invasion",
V1_invade2 < 0 & V1_invaded2 > 0 ~ "Only strain 2 invasion",
V1_invade2 < 0 & V1_invaded2 < 0 ~ "Mutual non-invasion"
)) %>%
select(V1, V2, invasion = category)
invade.df %>% filter(mut_id == "G-i_none")
invade.df %>% filter(res_id == "G-i_none")
invade.mat4 <- rbind(
select(invade.mat3, V1_label, V2_label, invasion),
select(invade.mat3, V2_label = V1_label, V1_label = V2_label) %>% mutate(invasion = NA)) %>%
mutate(
invasion_2 = case_when(
invasion == "Mutual invasion" ~ "Mutual invasion",
invasion == "Only strain 1 invasion" ~ "Competitive exclusion\nof another cue",
invasion == "Only strain 2 invasion" ~ "Competitive exclusion\nby another cue"
)) %>%
filter(!is.na(V1_label))
Adding missing grouping variables: `id_alt`
Adding missing grouping variables: `id_alt`
plot invasion matrix

create summary bar chart
# create a stacked barchart for summary
## filter out na
invade.matalt <- invade.mat3 %>% na.exclude()
# get frquency from both sides. Note when grouping for V2, from the perspective of cue 2, scenarrio when strain 2 invade = strain 1 invade
invade.matalt1 <- invade.matalt %>% group_by(V1_label, invasion) %>%
summarize(frequency_1 = n())
invade.matalt2 <- invade.matalt %>%
mutate(invasion_alt = case_when(
invasion == "Only strain 1 invasion" ~ "Only strain 2 invasion",
invasion == "Only strain 2 invasion" ~ "Only strain 1 invasion",
invasion == "Mutual invasion" ~ "Mutual invasion",
invasion == "Mutual non-invasion" ~ "Mutual non-invasion"
)) %>%
group_by(V2_label, invasion_alt) %>%
summarize(frequency_2 = n())
# full join and sum. has confirmed all of them add up to 14
invade.matalt3 <- full_join(invade.matalt1, invade.matalt2, by = c("V1_label" = "V2_label", "invasion" = "invasion_alt"))
invade.matalt3[is.na(invade.matalt3)] <- 0
invade.matalt4 <- invade.matalt3 %>%
mutate(freq = frequency_1 + frequency_2) %>%
mutate(temp = case_when(
invasion == "Only strain 1 invasion" ~ freq
)) %>%
group_by(V1_label) %>%
mutate(invade_1_freq = max(temp, na.rm = T)) %>%
mutate(invasion_2 = case_when(
invasion == "Mutual invasion" ~ "Mutual invasion",
invasion == "Only strain 1 invasion" ~ "Competitive exclusion\nof another cue",
invasion == "Only strain 2 invasion" ~ "Competitive exclusion\nby another cue"
))

plot together


ggarrange(invasion.pl1, invasion.pl2, align = "h", common.legend = T, widths = c(2, 1))
ggarrange(invasion.pl1, invasion.pl2, align = "h", common.legend = T, widths = c(2, 1))
ggsave(here("figures/plos-bio/invasion_a.tiff"), units = "px", width = 2250, height = 1100, scale = 1.4, dpi=300, bg = "white")

#—————- invasion pairwise comparison—————–# ## proces data
combined
invade_nocomb <- invade.df2 %>%
mutate(cue_1 = ifelse(
str_detect(label_ci_1, "sum"), "I+Ig",
trimws(gsub("+1.*|\\ log", "", label_ci_1))
),
log_1 = case_when(
str_detect(label_ci_1, "log") ~ "log",
str_detect(label_ci_1, "log", negate = T) ~ "none"),
comb_1 = case_when(
str_detect(label_ci_1, "1+|sum") ~ "comb",
str_detect(label_ci_1, "1+|sum", negate = T) ~ "none"
)) %>%
filter(comb_1 == "none")
invade_comb <- invade.df2 %>%
mutate(cue_1 = ifelse(
str_detect(label_ci_1, "sum"), "I+Ig",
trimws(gsub("+1.*|\\ log", "", label_ci_1))
),
log_1 = case_when(
str_detect(label_ci_1, "log") ~ "log",
str_detect(label_ci_1, "log", negate = T) ~ "none"),
comb_1 = case_when(
str_detect(label_ci_1, "1+|sum") ~ "comb",
str_detect(label_ci_1, "1+|sum", negate = T) ~ "none"
)) %>%
filter(comb_1 == "comb")
invade_comb.df <- left_join(
select(invade_nocomb, cue_1, res_id, log_1, Self = fitness),
select(invade_comb, cue_1, res_id, log_1, Total = fitness),
by = c("cue_1", "log_1", "res_id")) %>%
filter(!is.na(Total) & !is.na(Self)) %>%
mutate(classification = ifelse(Total > Self, "Total better", "Self better"))
invade_comb.df
plot

ggarrange(invade_log.pl, invade_comb.pl, align = "h", ncol = 2)
ggarrange(invade_log.pl, invade_comb.pl, align = "h", ncol = 2)
ggsave(here("figures/plos-bio/invasion_b.tiff"), units = "px", width = 2250, height = 850, scale = 1.2, dpi=300, bg = "white")

#===========================================# # Cue performance across single, co-infection, static, and invasion #===========================================#
plot

#-=====================# # Partitioning best cue #=====================-# #——- single infection ———–# # redo some optimization (lower fitness in no R than default)
source(here("functions/chabaudi_si_clean_R.R"))
source(here("functions/chabaudi_si_clean_N.R"))
# I none
cl <- makeCluster(detectCores()); setDefaultCluster(cl = cl)
I_no_R <- optimParallel(
par = rep(0.5,4), # start at 0.5x4
fn = chabaudi_si_clean_R,
control = list(trace = 6, fnscale = -1),
immunity = "tsukushi",
parameters = parameters_tsukushi,
time_range = seq(0, 20, by = 1e-3),
cue_range = seq(0, 6*(10^6), by = (6*(10^6))/5000),
cue = "I",
log_cue = "none",
solver = "vode")
stopCluster(cl)
# 0.144021 -43.1046 2030.27 -524.686
# 8.69589
import and process data
# import in data
si_partition.ls <- list.files(path = here("data/partition/si/"), pattern = "*.csv", full.names = T)
si_partition.ls <- lapply(si_partition.ls, read.csv)
si_partition.df <- do.call(rbind, si_partition.ls)
# combine with si fitness (default)
si_partition.df <- si_partition.df %>% left_join(select(si_fitness.df, id, fitness = value), by = "id")
# make longer
si_partition.df2 <- tidyr::pivot_longer(si_partition.df, c(fitness_R, fitness_N, fitness_W, fitness))
# calculate coefficient of variation. Also rename
si_partition.df2 <- si_partition.df2 %>%
group_by(name) %>%
mutate(cv = sd(value)/mean(value)*100,
mean = mean(value),
category = case_when(
name == "fitness_R" ~ "No RBC limitation",
name == "fitness_W" ~ "No targeted immunity",
name == "fitness_N" ~ "No indiscriminate\nimmunity",
name == "fitness" ~ "Default"
))
plot
library(ungeviz)
# raw fitness
si_partition.pl1 <- ggplot() +
geom_vpline(data = si_partition.df2, aes(y = fct_reorder(category, mean), x = mean, group = category, color = category), show.legend = F, size = 1) +
geom_point(data = si_partition.df2, aes(y = fct_reorder(category, mean), x = value), size = 2, alpha = 0.7) +
geom_line(data = si_partition.df2, aes(y = fct_reorder(category, mean), x = value, group = id), alpha = 0.2) +
labs(x = "Fitness", y = "Conditions") +
theme_bw()
# coefficient of variation
si_partition.pl2 <- ggplot() +
geom_bar(data = si_partition.df2, aes(y = fct_reorder(category, mean), x = cv), stat = "identity") +
labs(x = "Coefficient of\nvariation (%)", y = "") +
theme_bw() +
theme(axis.title.y=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank())
si_partition.pl <- ggarrange(si_partition.pl1, si_partition.pl2, widths = c(1, 0.3), align = "h")
si_partition.pl
ggsave(here("figures/plos-bio/partition_fitness.tiff"), width = 7, height = 4)
#——- consequences of no targeted immunity ————# # get dynamics of no targeted immunity
get_dyn <- function(df){
source(here("functions/chabaudi_si_clean_W.R"))
id <- df$id
cue <- df$cue
log <- df$log
par <- c(df$var_W1, df$var_W2, df$var_W3, df$var_W4)
cue_range <- seq(df$low, df$high, by = df$by)
# get dynamics
dyn <- chabaudi_si_clean_W(
parameters_cr = par,
immunity = "tsukushi",
parameters = parameters_tsukushi,
time_range = seq(0, 20, by = 1e-3),
cue_range = cue_range,
cue = cue,
log_cue = log,
solver = "vode",
dyn = T
)
# combine
dyn2 <- cbind(dyn, id = id, cue = cue, log = log)
write_parquet(dyn2, paste0(here("data/partition/si_dyn/"), id, "_noW_dyn.parquet"))
}
get df to run
# join with cue_range
cue_range_si.df <- read.csv(here("data/cue_range_si.csv"))
si_partition.df3 <- si_partition.df %>% left_join(select(cue_range_si.df, low, high, by, id), "id")
# lapply loop
si_partition.ls <- split(si_partition.df3, seq(nrow(si_partition.df3)))
mclapply(si_partition.ls, get_dyn)
process dataframe
# import in dataframe
no_W.ls <- list.files(here("data/partition/si_dyn/"), pattern = "*noW_dyn.parquet", full.names = T)
no_W.df <- lapply(no_W.ls, read_parquet)
no_W.df <- do.call(rbind, no_W.df)
# combine with ez label
ez_label <- read.csv(here("data/ez_label.csv"))
no_W.df <- left_join(no_W.df, ez_label, by = c("id" = "id_si"))
# get conversion rate
no_W.cr <- no_W.df %>% filter(variable == "cr")
no_W.I <- no_W.df %>% filter(variable == "I")
# get default conversion rate dynamics
si_dyn.df <- left_join(si_dyn.df, ez_label, by = c("id" = "id_si"))
si_dyn.cr <- si_dyn.df %>% filter(variable == "cr")
si_dyn.I <- si_dyn.df %>% filter(variable == "I")
plot conversion rate
mechanism: targeted immunity led to lower parasite density in the initial stages, which prevents parasites from making the switch from no conversion rate to high conversion rate. When parsite density undergoes drastic increase at the beginning due to lower immunity, this presents a higher degree of signal that allows parasite to make the switch appropriately
partition_cr.pl <- ggplot() +
geom_line(data = no_W.cr, aes(x = time, y= value, color = "No targeted immunity")) +
geom_line(data = si_dyn.cr, aes(x = time, y= value, color = "Default")) +
facet_wrap(~ez_label_si, ncol = 5) +
xlim(0, 20) +
geom_vline(xintercept = 7) +
labs(x = "Time (days)", y = "Conversion rate", color = "Condition") +
theme_bw()
no_W.cr
#—– cue state ————–#
function to get cue states
# function to get cue states
get_cue_state <- function(df){
cue <- trimws(gsub("_log|_none", "", unique(df$id)))
if(cue != "I+Ig"){
df2 <- df %>% filter(variable == cue)
if(str_detect(unique(df$id), "log")){
df2 <- df2 %>%
mutate(value = log10(value))
}
}
if(cue == "I+Ig"){
df2 <- df %>% filter(variable %in% c("I", "Ig")) %>%
group_by(time) %>%
mutate(value = sum(value))
if(str_detect(unique(df$id), "log")){
df2 <- df2 %>%
mutate(value = log10(value))
}
}
df2$value[df2$value == -Inf] <- 0
write_parquet(df2, paste0(here("data/partition/si_default_state/"), unique(df$id), "_noW_state.parquet"))
}
run function
# split dynamics based on id
no_W.split <- split(no_W.df, no_W.df$id)
# run function
mclapply(no_W.split, get_cue_state)
# get dataframe
no_W.state <- list.files(here("data/partition/si_state/"), pattern = "*.parquet", full.names = T)
no_W.state <- lapply(no_W.state, read_parquet)
no_W.state <- do.call(rbind, no_W.state)
no_W.state$value[no_W.state$value < 0] <- 0
# get same for si infection
default.split <- split(si_dyn.df, si_dyn.df$id)
mclapply(default.split, get_cue_state)
default.state <- list.files(here("data/partition/si_default_state/"), pattern = "*.parquet", full.names = T)
default.state <- lapply(default.state, read_parquet)
default.state <- do.call(rbind, default.state)
default.state$value[default.state$value < 0] <- 0
# manually correct non-logging
I_Ig.corr <- no_W.state %>% filter(id == "I+Ig_log") %>%
mutate(value = log10(value))
I_Ig.corr$value[I_Ig.corr$value < 0] <- 0
no_W.state2 <- no_W.state %>% filter(id != "I+Ig_log")
no_W.state2 <- no_W.state2 %>% rbind(no_W.state2, I_Ig.corr)
plot
absence of targeted immunity led to drastic increase in parasite density in early phases of infection. This produces high signal intensity for parasite and host-based cues, especially non-logged ones, which allows for state differentation. While this can be viewed as a modelling artifiact, it should be noted that the logged cues seldom changed as these changes in early infection did little to alter the actual early signal intensity sensed by the parasite. In an environment where there is heterogeneity in host response, and thus, signal, logging allows for parasites to adapt optimal strategy whereas non-logged cues must contend with sensitivity to immunity.
# function to individually plot stuff
plot_state <- function(df1, df2){
# plot state dynamics
state_pl <- ggplot() +
geom_line(data = df1, aes(x = time, y = value, color = name, group = name)) +
facet_wrap(~ez_label_si, scales = "free") +
xlim(1,20) +
theme_bw() +
theme(legend.position="none") +
labs(x = "", y = "Cue") +
scale_y_continuous(labels = function(x) format(x, scientific = TRUE)) +
scale_color_manual(values =c("Default" = "#4575b4", "No targeted\nimmunity" = "#fc8d59"))
# plot conversion rate dynamics
cr_pl <- ggplot() +
geom_raster(data = df2, aes(x = time, y = name, fill = value)) +
xlim(1,20) +
theme_bw() +
labs(x = "Time (days)") +
theme(axis.title.y=element_blank(),
axis.ticks.y=element_blank(),
legend.position = "none") +
scale_fill_viridis_c(lim = c(0, 1))
# arrange
ggarrange(state_pl, cr_pl, ncol = 1, nrow = 2, align = "v", heights = c(1, 0.4))
ggsave(paste0(here("figures/plos-bio/partition/"), unique(df1$id), ".tiff"), width = 4.5, height = 3.5)
}
split
# combine state
noW_default.state <- left_join(
select(no_W.state2, time, `No targeted\nimmunity` = value, id, ez_label_si),
select(default.state %>% filter(time <= 20), time, `Default` = value, id, ez_label_si), by = c("time", "id", "ez_label_si"))
noW_default.state2 <- tidyr::pivot_longer(noW_default.state, c(`No targeted\nimmunity`, `Default`))
# combine conversion raster
noW_default.cr <- left_join(
select(no_W.cr, time, `No targeted\nimmunity` = value, id, ez_label_si),
select(si_dyn.cr %>% filter(time <= 20), time, `Default` = value, id, ez_label_si), by = c("time", "id", "ez_label_si"))
noW_default.cr2 <- tidyr::pivot_longer(noW_default.cr, c(`No targeted\nimmunity`, `Default`))
# split
noW_default_state.ls <- split(noW_default.state2, noW_default.state2$id)
noW_default_cr.ls <- split(noW_default.cr2, noW_default.cr2$id)
# run function
mapply(plot_state, noW_default_state.ls, noW_default_cr.ls)
#——– reaction norms of default vs optimized ————# # get reaction norm and rug data
source(here("functions/par_to_df.R"))
# Gametocyte
g_log.rn <- par_to_df(par = c(1.211521, -3.936778, -1.312944, -1.285713), cue_range = seq(0, log10(6*(10^4)), by = (log10(6*(10^4)))/5000))
g_log.rn2 <- par_to_df(par = c(1.393860539, -4.253007616, -0.313947029, -2.000857344), cue_range = seq(0, log10(6*(10^4)), by = (log10(6*(10^4)))/5000))
g.rn <- par_to_df(par = c(0.04061288, -9.31445958, 74.13015506, -431.5984364), cue_range = seq(0, 6*(10^4), by = (6*(10^4))/5000))
g.rn2 <- par_to_df(par = c(0.541729073, -3.904616443, 0.87487412, -0.694177021), cue_range = seq(0, 6*(10^4), by = (6*(10^4))/5000))
# I+Ig
I_Ig_log.rn <- par_to_df(par = c(3.594042, 4.157744, -13.530672, 2.599905), cue_range = seq(0, log10(6*(10^6)), by = (log10(6*(10^6)))/5000))
I_Ig_log.rn2 <- par_to_df(par = c(63.71893822, -87.77671601, -56.55475514, -66.02209549), cue_range = seq(0, log10(6*(10^6)), by = (log10(6*(10^6)))/5000))
I_Ig.rn <- par_to_df(par = c(0.3159297, -46.1104558, 1250.752908, -6.1982093), cue_range = seq(0, 6*(10^6), by = (6*(10^6))/5000))
I_Ig.rn2 <- par_to_df(par = c(0.731982784, -21.69799449, 149.7841876, 17.02551769), cue_range = seq(0, 6*(10^6), by = (6*(10^6))/5000))
# convert log to non-logged scale
g_log.rn$cue_range <- 10^(g_log.rn$cue_range)
g_log.rn2$cue_range <- 10^(g_log.rn2$cue_range)
I_Ig_log.rn$cue_range <- 10^(I_Ig_log.rn$cue_range)
I_Ig_log.rn2$cue_range <- 10^(I_Ig_log.rn2$cue_range)
# get rug
g_log.rug <- default.state %>%
filter(label_si == "G log") %>%
mutate(value = 10^value) %>%
select(label_si, value)
g_log.rug2 <- no_W.state %>%
filter(label_si == "G log") %>%
mutate(value = 10^value) %>%
filter(value <= 6*(10^4)) %>%
select(label_si, value)
I_Ig_log.rug <- default.state %>%
filter(label_si == "I+Ig log") %>%
select(label_si, value)
I_Ig_log.rug2 <- no_W.state %>%
filter(label_si == "I+Ig log") %>%
select(label_si, value)
g.rug <- default.state %>%
filter(label_si == "G") %>%
select(label_si, value)
g.rug2 <- no_W.state %>%
filter(label_si == "G" & value <= 6*(10^4)) %>%
select(label_si, value)
I_Ig.rug <- default.state %>%
filter(label_si == "I+Ig") %>%
select(label_si, value)
I_Ig.rug2 <- no_W.state %>%
filter(label_si == "I+Ig") %>%
select(label_si, value)
# get rug limits
rug_lim <- rbind(g_log.rug,
g_log.rug2,
I_Ig_log.rug,
I_Ig_log.rug2,
g.rug,
g.rug2,
I_Ig.rug,
I_Ig.rug2) %>%
group_by(label_si) %>%
summarize(max = max(hablar::s(value), na.rm = T),
min = min(hablar::s(value), na.rm = T))
# combine and filter
rn <- rbind(
cbind(g_log.rn, label_si = "G log", condition = "Default"),
cbind(g_log.rn2, label_si = "G log", condition = "No targeted\nimmunity"),
cbind(g.rn, label_si = "G", condition = "Default"),
cbind(g.rn2, label_si = "G", condition = "No targeted\nimmunity"),
cbind(I_Ig_log.rn, label_si = "I+Ig log", condition = "Default"),
cbind(I_Ig_log.rn2, label_si = "I+Ig log", condition = "No targeted\nimmunity"),
cbind(I_Ig.rn, label_si = "I+Ig", condition = "Default"),
cbind(I_Ig.rn2, label_si = "I+Ig", condition = "No targeted\nimmunity")
) %>%
left_join(rug_lim, by = "label_si") %>%
group_by(label_si) %>%
filter(cue_range <= max & cue_range >= min)
# combine rug
rug <- rbind(cbind(g_log.rug, condition = "Default"),
cbind(g_log.rug2, condition = "No targeted\nimmunity"),
cbind(g.rug, condition = "Default"),
cbind(g.rug2, condition = "No targeted\nimmunity"),
cbind(I_Ig_log.rug, condition = "Default"),
cbind(I_Ig_log.rug2, condition = "No targeted\nimmunity"),
cbind(I_Ig.rug, condition = "Default"),
cbind(I_Ig.rug2, condition = "No targeted\nimmunity"))
# cobine with ezlabel
rn2 <- rn %>% left_join(ez_label, by = "label_si")
rug2 <- rug %>% left_join(ez_label, by = "label_si")
# filter rug
default.rug <- rug2 %>% filter(condition == "Default")
no.rug <- rug2 %>% filter(condition == "No targeted\nimmunity")
plot
ggplot() +
geom_line(data = rn2, aes(x = cue_range, y = cr, color = condition)) +
geom_rug(data = default.rug, aes(x = value, color = condition), sides = "b") +
geom_rug(data = no.rug, aes(x = value, color = condition), sides = "t") +
facet_wrap(~fct_relevel(ez_label_si, c("Gametocyte log10", "Gametocyte", "Asexual&sexual\niRBC log10", "Asexual&sexual iRBC")), scales = "free_x") +
scale_x_continuous(labels = function(x) format(x, scientific = TRUE)) +
theme_bw() +
scale_color_manual(values =c("Default" = "#4575b4", "No targeted\nimmunity" = "#fc8d59")) +
ylim(0, 1.1) +
labs(x = "Cue range", y = "Conversion rate", color = "Condition")
ggsave(here("figures/plos-bio/partition_rn.tiff"), width = 7.5, height = 6)
get conversion rate legend
noW_default.cr %>% filter(id == "G_log") %>%
ggplot() +
geom_raster(aes(x = time, y = id, fill = Default)) +
xlim(1,20) +
theme_bw() +
labs(x = "Time (days)",
fill = "Conversion rate") +
theme(axis.title.y=element_blank(),
axis.ticks.y=element_blank(),) +
scale_fill_viridis_c(lim = c(0, 1))
ggsave(here("figures/plos-bio/cr_legend.tiff"))
#================================# # Disease curves for single, co-infection, and invasion #===============================# # get data for disease curves
# single infection dynamics
si_dyn.df <- read_parquet(here("data/si_dyn/si_dyn_30.parquet"))
# co-infection dynamics (mon-cue)
ci_dyn.df <- read_parquet(here("data/ci_dyn/ci_dyn.parquet"))
# dual cue dynamics
dual_dyn.df <- read_parquet(here("data/dual_cue_dyn/dual_cue_dyn.parquet"))
#——- single cue comparison —————# # process data
# process dynamics -> turn skinny
si_dc.df <- si_dyn.df %>%
filter(variable == "I" | variable == "Ig" | variable == "R") %>%
tidyr::pivot_wider(names_from = variable, values_from = value) %>%
mutate(total = I+Ig)
# process dynamics -> turn skinny
si_dc.df <- si_dyn.df %>%
filter(variable == "I" | variable == "Ig" | variable == "R") %>%
tidyr::pivot_wider(names_from = variable, values_from = value) %>%
mutate(total = I+Ig)
plot

#———- co-infection monocue ————-#
# get relevent variables
ci_dc.df <- ci_dyn.df %>%
filter(variable == "I1" | variable == "Ig1" | variable == "R")
# morph into skinny format
ci_dc.df <- tidyr::pivot_wider(ci_dc.df, names_from = variable, values_from = value, id_cols = c(time, label)) %>%
mutate(total = I1+Ig1)
# good cue bad cue
ci_cue.dv <- ci_fitness.df %>%
mutate(classification = case_when(
value > 2.25 ~ "High-performing",
value <= 2.25 ~ "Poor-performing"
))
# join with classificaiton
ci_dc.df2 <- ci_dc.df %>% left_join(ci_cue.dv, by = "label")
# split into top erforming and poor-performing cues
ci_dc.high <- ci_dc.df2 %>% filter(classification == "High-performing")
ci_dc.poor <- ci_dc.df2 %>% filter(classification == "Poor-performing")
# join high performing with label
ci_dc.high2 <- ci_dc.high %>% left_join(ez_label, by = c("label" = "label_ci"))
#write_parquet(ci_dc.high2, here("data/disease_curve/ci_dc_high.parquet"))
#write_parquet(ci_dc.poor, here("data/disease_curve/ci_dc_poor.parquet"))
plot
ci_dc.poor <- read_parquet(here("data/disease_curve/ci_dc_poor.parquet"))
ci_dc.high2 <- read_parquet(here("data/disease_curve/ci_dc_high.parquet"))
# plot
ci_dc.pre <- ggplot() +
geom_path(data = ci_dc.poor, aes(x= total, y = R, group = label), color = "dark grey", arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
labs(color = "Co-infection\ngood performing cues", x = "Asexual & sexual iRBC per µL", y = "RBC per µL") +
theme_bw()+ scale_y_continuous(labels = function(x) format(x, scientific = TRUE, accuracy = 0.1)) +
guides(shape = FALSE)
ci_dc.pl <- ci_dc.pre +
geom_point(data = ci_dc.high2 %>% filter(row_number() %% 1000 ==0), aes(x = total, y = R, color = ez_label, shape = ez_label), size = 3) +
geom_path(data = ci_dc.high2, aes(x= total, y = R, group = ez_label, color = ez_label), size = 1, arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
scale_color_manual(values=c( "#4575b4", "#fc8d59", "#fdcb44", "#91bfdb")) +
guides(color = guide_legend(override.aes = list(size = 0.1)))
#——— dual cue ————————–# # process data
plot
dual_dc.high <- read_parquet(here("data/disease_curve/dual_dc_high.parquet"))
dual_dc.poor <- read_parquet(here("data/disease_curve/dual_dc_poor.parquet"))
dual_dc.high2 <- dual_dc.high %>%
filter(label_alt == "R log10 + I log10")
# add
dual_dc.pre <- ggplot() +
geom_path(data = dual_dc.poor, aes(x= total, y = R, group = label_alt), color = "dark grey", arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
labs(color = "High-performing\ndual cues per µL", x = "Asexual & sexual iRBC", y = "RBC per µL") +
theme_bw()+ scale_y_continuous(labels = function(x) format(x, scientific = TRUE, accuracy = 0.1)) +
guides(shape = FALSE)
dual_dc.pl <- dual_dc.pre +
geom_point(data = dual_dc.high2 %>% filter(row_number() %% 1000 ==0), aes(x = total, y = R, shape = label_alt), color = "#4575b4", size = 3) +
geom_path(data = dual_dc.high2, aes(x= total, y = R, group = label_alt), color = "#4575b4", size = 1, arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
theme(legend.position = "none") +
guides(color = guide_legend(override.aes = list(size = 0.1)))
#——— co-infection static —————-#
# import in dynamics data
static_dyn.ls <- list.files(path = here("data/ci_static/"), pattern = "*.parquet", full.names = T)
static_dyn.ls <- lapply(static_dyn.ls, read_parquet)
# filter variable and transform
static_dyn.ls2 <- mclapply(static_dyn.ls, function(x){
x %>%
filter(variable == "I1" | variable == "Ig1" | variable == "I2" | variable == "Ig2" | variable == "R") %>%
mutate(id_alt = paste(id_1, id_2)) %>%
tidyr::pivot_wider(names_from = variable, values_from = value, id_cols = c(time, id_alt)) %>%
mutate(total1 = I1+Ig1, total2 = I2+Ig2)
})
static_dc.df <- do.call(rbind, static_dyn.ls2)
static_dc.df <- static_dc.df %>%
mutate(id_1 = gsub(" .*", "", id_alt),
id_2 = gsub(".* ", "", id_alt)) %>%
filter(id_1 != id_2)
#write_parquet(static_dc.df, here("data/disease_curve/static_dc.parquet"))
further processing
static_dc.df <- read_parquet(here("data/disease_curve/static_dc.parquet"))
# get winners and losers
## import in fitness
static_fitness.df <- read.csv(here("data/ci_static.csv"))
## get winner situation
static_fitness.df2 <- static_fitness.df %>%
filter(id_1 != id_2) %>%
mutate(winning_id = case_when(
fitness_difference > 0 ~ id_1,
fitness_difference< 0 ~ id_2
),
losing_id = case_when(
fitness_difference < 0 ~ id_1,
fitness_difference> 0 ~ id_2
))
# left join
static_dc.df2 <- static_dc.df %>%
left_join(select(static_fitness.df2, id_1, id_2, winning_id, losing_id, fitness_difference), by = c("id_1", "id_2"))
# get winner-loser difference in terms of I+Ig also filter out to onyl very strong fitness difference
static_dc.df3 <- static_dc.df2 %>%
mutate(total_diff = case_when(
fitness_difference > 0 ~ total1-total2,
fitness_difference< 0 ~ total2-total2
),
total_winner = case_when(
fitness_difference > 0 ~ total1,
fitness_difference< 0 ~ total2
),
total_loser = case_when(
fitness_difference > 0 ~ total2,
fitness_difference< 0 ~ total1
)) %>%
filter(abs(fitness_difference) > 0.5)
plot
static_dc.pl <- ggplot() +
geom_path(data = static_dc.df3, aes(x= total_winner, y = R, group = id_alt, color = "Winner"), alpha = 0.5, arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
geom_path(data = static_dc.df3, aes(x= total_loser, y = R, group = id_alt, color = "Loser"),
alpha = 0.5,arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
labs(color = "Status", x = "Asexual & sexual iRBC", y = "RBC") +
scale_color_manual(values=c("Winner" = "#4575b4","Loser"= "#fc8d59")) +
theme_bw() +
scale_y_continuous(labels = function(x) format(x, scientific = TRUE, accuracy = 0.1))
#———co-infection invasion —————# # get invasion dynamic
# get invasion df
invasion_fitness.df <- read.csv(here("data/ci_invasion.csv"))
# get cue range
ci_cue_range <- read.csv(here("data/cue_range_ci.csv"))
invasion_fitness.df2 <- invasion_fitness.df %>%
left_join(select(ci_cue_range, id, mut_cue = cue, mut_low = low, mut_high = high, mut_by = by), by = c("mut_id"= "id")) %>%
left_join(select(ci_cue_range, id, res_cue = cue, res_low = low, res_high = high, res_by = by), by = c("res_id"= "id"))
function to get dynamic
get_invasion_dyn <- function(df){
# get cues
mut_cue <- df$mut_cue
res_cue <- df$res_cue
# get info of cues (for co infection)
if(stringr::str_detect(mut_cue, "-i")){mut_cue = gsub("*-i", "1", mut_cue)}
if(stringr::str_detect(mut_cue, "-i", negate = T)){mut_cue = mut_cue}
if(stringr::str_detect(res_cue, "-i")){res_cue = gsub("*-i", "2", res_cue)}
if(stringr::str_detect(res_cue, "-i", negate = T)){res_cue = res_cue}
# get log
mut_log <- ifelse(stringr::str_detect(df$mut_id, "log"), "log10", "none")
res_log <- ifelse(stringr::str_detect(df$res_id, "log"), "log10", "none")
# get parameters
mut_par <- c(df$mut_var1_opt, df$mut_var2_opt, df$mut_var3_opt, df$mut_var4_opt)
res_par <- c(df$res_var1, df$res_var2, df$res_var3, df$res_var4)
# get cue range
mut_cue_range <- seq(df$mut_low, df$mut_high, by = df$mut_by)
res_cue_range <- seq(df$res_low, df$res_high, by = df$res_by)
# get dynamics of co infection
ci_dyn <- chabaudi_ci_clean(
parameters_cr_1 = mut_par,
parameters_cr_2 = res_par,
immunity = "tsukushi",
parameters = parameters_tsukushi,
cue_1 = mut_cue,
cue_2 = res_cue,
cue_range_1 = mut_cue_range,
cue_range_2 = res_cue_range,
log_cue_1 = mut_log,
log_cue_2 = res_log,
solver = "vode",
time_range = seq(0, 30, 0.001),
dyn = T)
# append label to all df
ci_dyn2 <- cbind(ci_dyn, mut_id = df$mut_id, res_id = df$res_id)
# write
write_parquet(ci_dyn2, paste0(here("data/ci_invasion_dyn/"), df$mut_id, "-", df$res_id, ".parquet"))
}
run dynamic funciton
# get function and parameters
source(here("functions/chabaudi_ci_clean.R"))
parameters_tsukushi <- c(R1 = 8.89*(10^6), # slightly higher
lambda = 3.7*(10^5),
mu = 0.025,
p = 8*(10^-6), # doubled form original
alpha = 1,
alphag = 2,
beta = 5.721,
mum = 48,
mug = 4,
I0 = 43.85965,
Ig0 = 0,
a = 150,
b = 100,
sp = 1,
psin = 16.69234,
psiw = 0.8431785,
phin = 0.03520591,
phiw = 550.842,
iota = 2.18*(10^6),
rho = 0.2627156)
# split
invasion.ls <- split(invasion_fitness.df2, seq(nrow(invasion_fitness.df2)))
# run function
mclapply(invasion.ls, get_invasion_dyn, mc.cores = 4)
process data
# import in invasion dynamics
invasion_dyn.ls <- list.files(path = here("data/ci_invasion_dyn"), pattern = "*.parquet", full.names = T)
invasion_dyn.ls <- lapply(invasion_dyn.ls, read_parquet)
# filter and so on
invasion_dyn.ls2 <- mclapply(invasion_dyn.ls[167:177], mc.cores = 4, function(x){
x2 <- x %>%
filter(variable == "I1" | variable == "Ig1" | variable == "I2" | variable == "Ig2" | variable == "R") %>%
mutate(id_alt = paste(mut_id, res_id)) %>%
select(id_alt, time, variable, value) %>%
tidyr::pivot_wider(names_from = variable, values_from = value, id_cols = c(time, id_alt)) %>%
mutate(total1 = I1+Ig1, total2 = I2+Ig2)
write_parquet(x2, paste0(here("data/disease_curve/ci_invasion/"), unique(x2$id_alt), "_dc.parquet"))
})
# fetch data
invasion_dyn.ls2 <- list.files(path = here("data/disease_curve/ci_invasion"), pattern = "*.parquet", full.names = T)
invasion_dyn.ls2 <- lapply(invasion_dyn.ls2, read_parquet)
invasion_dc.df <- do.call(rbind, invasion_dyn.ls2)
invasion_dc.df <- invasion_dc.df %>%
mutate(mut_id = gsub(" .*", "", id_alt),
res_id = gsub(".* ", "", id_alt)) %>%
filter(mut_id != res_id)
#write_parquet(invasion_dc.df, here("data/disease_curve/invasion_dc.parquet"))
further processing
invasion_dc.df <- read_parquet(here("data/disease_curve/invasion_dc.parquet"))
# get winners and losers
invasion_fitness.df <- read.csv(here("data/ci_invasion.csv"))
invasion_dc.df2 <- invasion_dc.df %>%
left_join(invasion_fitness.df, by = c("mut_id", "res_id")) %>%
mutate(
total_winner = case_when(
fitness> 0 ~ total1,
fitness< 0 ~ total2
),
total_loser = case_when(
fitness > 0 ~ total2,
fitness < 0 ~ total1
)) %>%
filter(abs(fitness) > 0.5)
plot
invasion_dc.pl <- ggplot() +
geom_path(data = invasion_dc.df2, aes(x= total_winner, y = R, group = id_alt, color = "Winner"), alpha = 0.5, arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
geom_path(data = invasion_dc.df2, aes(x= total_loser, y = R, group = id_alt, color = "Loser"),
alpha = 0.5,arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
labs(color = "Status", x = "Asexual & sexual iRBC", y = "RBC") +
scale_color_manual(values=c("Winner" = "#4575b4","Loser"= "#fc8d59")) +
theme_bw() +
scale_y_continuous(labels = function(x) format(x, scientific = TRUE, accuracy = 0.1)) %>%
theme(legend.position = "none")
#——— quantifying disease curve area ————# # function to calculate area between sets of points -> from https://stackoverflow.com/questions/3672260/area-covered-by-a-point-cloud-with-r
library(splancs)
Loading required package: sp
Spatial Point Pattern Analysis Code in S-Plus
Version 2 - Spatial and Space-Time analysis
Attaching package: ‘splancs’
The following object is masked from ‘package:dplyr’:
tribble
cha<-function(df){
x <- df$total
y <- df$R
chull(x,y)->i
return(areapl(cbind(x[i],y[i])))
}
library(splancs)
cha<-function(df){
x <- df$total
y <- df$R
chull(x,y)->i
return(areapl(cbind(x[i],y[i])))
}
loop to get area: single infection
# join with fitness
si_fitness.df <- si_fitness.df %>% left_join(ez_label, by = c("id" = "id_si"))
Error in is.data.frame(y) : object 'ez_label' not found
coinfection
# split
ci_dc_high.ls <- split(ci_dc.high2, ci_dc.high2$ez_label)
ci_dc_poor.ls <- split(ci_dc.poor, ci_dc.poor$label)
# run function to find area
ci_dc_high.area <- cbind.data.frame(area = as.numeric(lapply(ci_dc_high.ls, cha)), id_alt = names(lapply(ci_dc_high.ls, cha)), value = unique(ci_dc.high2$value))
ci_dc_poor.area <- cbind.data.frame(area = as.numeric(lapply(ci_dc_poor.ls, cha)), id_alt = names(lapply(ci_dc_poor.ls, cha)), value = unique(ci_dc.poor$value))
# edit and join
ci_dc_high.area2 <- ci_dc_high.area %>%
select(area, value) %>%
mutate(condition = "Co-infection")
ci_dc_poor.area2 <- ci_dc_poor.area %>%
select(area, value) %>%
mutate(condition = "Co-infection")
dual cue
# split
dual.dc <- read_parquet(here("data/disease_curve/dual_dc.parquet"))
dual_dc.ls <- split(dual.dc, dual.dc$label_alt)
# get area
dual_dc.area <- cbind.data.frame(area = as.numeric(lapply(dual_dc.ls, cha)), id_alt = names(lapply(dual_dc.ls, cha)))
# bind with fitness
dual_fitness.df <- dual_fitness.df %>% mutate(id_alt = paste(label, "+", label_b))
dual_dc.area2 <- dual_dc.area %>%
left_join(dual_fitness.df, by = "id_alt") %>%
select(area, value) %>%
mutate(condition = "Dual-cue") %>%
filter(value > 2)
dual_dc.area2
#—— get fitted scatter plot for all single infection, co infection, and dual cue ——–#
# plot
library("ggpmisc")
Loading required package: ggpp
Attaching package: ‘ggpp’
The following object is masked from ‘package:ggplot2’:
annotate
#——- plot together with disease curve ——–#
# single infection
si_vir.pl <- ggarrange(si_dc.pl, si_area.pl, align = "h", widths = c(1, 0.35))
Warning: Graphs cannot be horizontally aligned unless the axis parameter is set. Placing graphs unaligned.
# single infection
si_vir.pl <- ggarrange(si_dc.pl, si_area.pl, align = "h", widths = c(1, 0.35))
Warning: Graphs cannot be horizontally aligned unless the axis parameter is set. Placing graphs unaligned.
# co-infection
ci_vir.pl <- ggarrange(ci_dc.pl, ci_area.pl, align = "h", widths = c(1, 0.35))
Warning: Graphs cannot be horizontally aligned unless the axis parameter is set. Placing graphs unaligned.
#——— static area comparison ————-# # compute area
# import in dc dynamic and fitness
static_dc.df <- read_parquet(here("data/disease_curve/static_dc.parquet"))
static_fitness.df <- read.csv(here("data/ci_static.csv"))
# get winner and loser
static_dc.df4 <- static_dc.df %>%
left_join(select(static_fitness.df, id_1, id_2, fitness_difference), by = c("id_1", "id_2")) %>%
filter(id_1 != id_2) %>%
mutate(
total_winner = case_when(
fitness_difference > 0 ~ total1,
fitness_difference< 0 ~ total2
),
total_loser = case_when(
fitness_difference > 0 ~ total2,
fitness_difference< 0 ~ total1
))%>%
filter(abs(fitness_difference) > 0.5)
# split by winner and loser
static_dc.ls1 <- split(select(static_dc.df4, R, total = total_winner), static_dc.df4$id_alt)
static_dc.ls2 <- split(select(static_dc.df4, R, total = total_loser), static_dc.df4$id_alt)
# get area
static_win.area <- cbind.data.frame(area = as.numeric(lapply(static_dc.ls1, cha)), status = "Winner")
static_loser.area <- cbind.data.frame(area = as.numeric(lapply(static_dc.ls2, cha)), status = "Loser")
# pair
static.area <- cbind(select(static_win.area, Winner = area),
select(static_loser.area, Loser = area)) %>%
mutate(classification = ifelse(Winner>Loser, "Winner larger area", "Loser larger area"))
plot static

#——— invasion area comparison —————–# # get area
# import in dc dynamic and fitness
invasion_dc.df <- read_parquet(here("data/disease_curve/invasion_dc.parquet"))
invasion_fitness.df <- read.csv(here("data/ci_invasion.csv"))
invasion_dc.df4 <- invasion_dc.df %>%
left_join(invasion_fitness.df, by = c("mut_id", "res_id")) %>%
mutate(
total_winner = case_when(
fitness> 0 ~ total1,
fitness< 0 ~ total2
),
total_loser = case_when(
fitness > 0 ~ total2,
fitness < 0 ~ total1
)) %>%
filter(abs(fitness) > 0.5)
# split by winner and loser
invasion_dc.ls1 <- split(select(invasion_dc.df4, R, total = total_winner), invasion_dc.df4$id_alt)
invasion_dc.ls2 <- split(select(invasion_dc.df4, R, total = total_loser), invasion_dc.df4$id_alt)
# get area
invasion_win.area <- cbind.data.frame(area = as.numeric(lapply(invasion_dc.ls1, cha)), status = "Winner")
invasion_loser.area <- cbind.data.frame(area = as.numeric(lapply(invasion_dc.ls2, cha)), status = "Loser")
# pair
invasion.area <- cbind(select(invasion_win.area, Winner = area),
select(invasion_loser.area, Loser = area)) %>%
mutate(classification = ifelse(Winner>Loser, "Winner larger area", "Loser larger area"))
plot

#—— plot together ————-#
# pairwise comparison for static and invasive comeptition
heterocue_comp.pl <- ggarrange(static_area.pl, invasion_area.pl, ncol = 2, nrow = 1, align = "v")
# join inthe other disease curves
ggarrange(si_vir.pl, ci_vir.pl, dual_vir.pl, heterocue_comp.pl, ncol = 2, nrow = 2, labels = c("A", "B", "C", "D"), heights = c(1, 0.8))
ggsave(here("figures/plos-bio/virulence.tiff"), units = "px", width = 2250, height = 1600, scale = 1.9, dpi=300, bg = "white")

#====================================# # dual cue dynamics figure #===================================#
get dual dynamics
dual.dyn <- chabaudi_si_clean(
parameters_cr = c(4.446192033, 10.97518275, 1.38762817, 23.3059254, -3.452052371, -18.0070692, 39.66614226, -3.545193141, 18.78350799),
immunity = "tsukushi",
parameters = parameters_tsukushi,
time_range = seq(0, 30, by = 1e-3),
cue_range = seq(6, 7, by = 1/500),
cue_range_b = seq(0, log10(6*(10^6)), by = (log10(6*(10^6)))/500),
cue = "R",
cue_b = "I",
log_cue = "log10",
log_cue_b = "log10",
solver = "vode",
dyn = T
)
# filter out relevent dataframes
dual.dyn_f <- dual.dyn %>%
filter(variable %in% c("I", "Ig", "G", "R", "N", "W"))
# cr only
dual.dyn_cr <- dual.dyn %>% filter(variable == "cr")
plot
dual_I.plt <- ggplot() +
geom_line(data = dual.dyn_f %>% filter(variable == "I"), aes(x = time, y = value/(10^5)),
color = "#4575b4") +
geom_point(data = dual.dyn_f %>% filter(variable == "I" & row_number() %% 1000 == 0),
aes(x = time, y = value/(10^5)), size = 2, color = "#4575b4") +
geom_line(data = dual.dyn_cr, aes(x = time, y = value)) +
scale_y_continuous(name = "Conversion rate",
sec.axis = sec_axis(~.*10^5, name="Asexual iRBC per µL")) +
labs(x = "Time (days)") +
xlim(0, 20) +
theme_bw() +
theme(legend.position = "none")
dual_Ig.plt <-ggplot() +
geom_line(data = dual.dyn_f %>% filter(variable == "Ig"), aes(x = time, y = value/(10^5)),
color = "#4575b4") +
geom_point(data = dual.dyn_f %>% filter(variable == "Ig" & row_number() %% 1000 == 0),
aes(x = time, y = value/(10^5)), size = 2, color = "#4575b4") +
geom_line(data = dual.dyn_cr, aes(x = time, y = value)) +
scale_y_continuous(name = "Conversion rate",
sec.axis = sec_axis(~.*10^5, name="Sexual iRBC per µL")) +
labs(x = "Time (days)") +
xlim(0, 20) +
theme_bw() +
theme(legend.position = "none")
dual_G.plt <-ggplot() +
geom_line(data = dual.dyn_f %>% filter(variable == "G"), aes(x = time, y = value/(10^4)),
color = "#4575b4") +
geom_point(data = dual.dyn_f %>% filter(variable == "G" & row_number() %% 1000 == 0),
aes(x = time, y = value/(10^4)), size = 2, color = "#4575b4") +
geom_line(data = dual.dyn_cr, aes(x = time, y = value)) +
scale_y_continuous(name = "Conversion rate",
sec.axis = sec_axis(~.*10^4, name="Gametocyte per µL")) +
labs(x = "Time (days)") +
xlim(0, 20) +
theme_bw() +
theme(legend.position = "none")
dual_R.plt <-ggplot() +
geom_line(data = dual.dyn_f %>% filter(variable == "R"), aes(x = time, y = value/(10^7)),
color = "#4575b4") +
geom_point(data = dual.dyn_f %>% filter(variable == "R" & row_number() %% 1000 == 0),
aes(x = time, y = value/(10^7)), size = 2, color = "#4575b4") +
geom_line(data = dual.dyn_cr, aes(x = time, y = value)) +
scale_y_continuous(name = "Conversion rate",
sec.axis = sec_axis(~.*10^7, name="RBC per µL")) +
labs(x = "Time (days)") +
xlim(0, 20) +
theme_bw() +
theme(legend.position = "none")
dual_N.plt <-ggplot() +
geom_line(data = dual.dyn_f %>% filter(variable == "N"), aes(x = time, y = value*10),
color = "#4575b4") +
geom_point(data = dual.dyn_f %>% filter(variable == "N" & row_number() %% 1000 == 0),
aes(x = time, y = value*10), size = 2, color = "#4575b4") +
geom_line(data = dual.dyn_cr, aes(x = time, y = value)) +
scale_y_continuous(name = "Conversion rate",
sec.axis = sec_axis(~.*0.1, name="Indiscriminate immunity")) +
labs(x = "Time (days)") +
xlim(0, 20) +
theme_bw() +
theme(legend.position = "none")
dual_W.plt <-ggplot() +
geom_line(data = dual.dyn_f %>% filter(variable == "W"), aes(x = time, y = value*2),
color = "#4575b4") +
geom_point(data = dual.dyn_f %>% filter(variable == "W" & row_number() %% 1000 == 0),
aes(x = time, y = value*2), size = 2, color = "#4575b4") +
geom_line(data = dual.dyn_cr, aes(x = time, y = value)) +
scale_y_continuous(name = "Conversion rate",
sec.axis = sec_axis(~.*0.5, name="Targeted immunity")) +
labs(x = "Time (days)") +
xlim(0, 20) +
theme_bw() +
theme(legend.position = "none")
plot together
ggarrange(dual_I.plt, dual_Ig.plt, dual_G.plt, dual_R.plt, dual_N.plt, dual_W.plt, nrow = 3, ncol = 2, align = "hv", labels = c("A", "B", "C", "D", "E", "F"))
ggsave(here("figures/plos-bio/dual_dyn.tiff"), units = "px", width = 2250, height = 2000, scale = 1, dpi=300, bg = "white")
#======================================# # Single and co-infection verification #======================================# # single infection
# import in all single infection data
si_val.ls <- list.files(path = here("data/si_validation"), pattern = "*.csv", full.names = T)
si_val.df <- lapply(si_val.ls, read.csv)
si_val.df <- do.call(rbind, si_val.df)
# get max fitness from simulation. left join with si_opt
si_opt.df <- read.csv(here("data/si_opt.csv"))
# we can see that all of the randomly simulated models have a fitness value that is less than the optimized model
si_val.df2 <- select(si_val.df, V1, id) %>%
left_join(si_opt.df, by =c("id" = "id")) %>%
mutate(fitness_difference = fitness_20 - V1) %>%
left_join(select(ez_label, id_si, ez_label_si), by = c("id" = "id_si"))
plot
si_val.plt <- ggplot(data = si_val.df2, aes(x = fitness_difference)) +
geom_histogram(bins = 50) +
geom_vline(xintercept = 0, color = "#fc8d59") +
facet_wrap(~ez_label_si, scales = "free", ncol = 3) +
labs(x = "Optimized fitness - random fitness", y = "Frequency") +
theme_bw()
ci_val.plt <- ggplot(data = ci_val.df2, aes(x = V1)) +
geom_histogram(bins = 50) +
geom_vline(xintercept = 0, color = "#fc8d59") +
facet_wrap(~ez_label, scales = "free", ncol = 4) +
labs(x = "Fitness difference between\noptimized and random strain", y = "Frequency") +
theme_bw()
ggarrange(si_val.plt, ci_val.plt, align = "hv", labels = c("A", "B"), widths = c(3,4))
ggsave(here("figures/plos-bio/validation.tiff"), units = "px", width = 2250, height = 1300, scale = 1.6, dpi=300, bg = "white")
#=========================# # Monte carlo dynamics supplementary #=========================# # run code in report 16
mc_dyn_a <- ggplot() +
geom_line(data = reference.df, aes(x = time, y = cr)) +
geom_ribbon(data = diff.df, aes(x = time, ymin = cr_bot, ymax = cr_top), alpha = 0.5, fill = "#fc8d59") +
facet_wrap(~cue, ncol = 2) +
labs(x = "Time (days)", y = "Conversion rate") +
theme_bw()
# plot fitness timeseries. When if tiness lost? At the latter part
mc_dyn_b <- ggplot() +
geom_line(data = reference.df, aes(x = time, y = tau)) +
geom_ribbon(data = diff.df, aes(x = time, ymin = tau_bot, ymax = tau_top), alpha = 0.5, fill = "#fc8d59") +
facet_wrap(~cue, ncol = 2) +
labs(x = "Time (days)", y = "Transmission potential") +
theme_bw()
ggarrange(mc_dyn_a, mc_dyn_b, ncol = 1, align = "v", labels = c("A", "B"))
Warning: Removed 1 row containing missing values (`geom_line()`).
ggsave(here("figures/plos-bio/MC_dyn.tiff"), units = "px", width = 2250, height = 2000, scale = 1.3, dpi=300, bg = "white")

---
title: "R Notebook"
output: html_notebook
---
```{r}
library(dplyr)
library(ggplot2)
library(forcats)
library(here)
library(deSolve)
library(crone)
library(optimParallel)
library(doParallel)
library(doRNG)
library(arrow)
library(stringr)
library(parallel)
library(ggpubr)
```

Notebook for plotting all of the figures for PloS Biology manuscript submission

Guidelines: taken from https://journals.plos.org/plosbiology/s/figures#loc-figure-file-requirements
1. format: eps
2. max file size: 10 MB
3. text size: Arial, Times, or Symbol font only in 8-12 point
2. figure size: Width: 789 – 2250 pixels (at 300 dpi). Height maximum: 2625 pixels (at 300 dpi).


#=========================================#
figure 1: best single and co-infection cue
#=========================================#
Figure displaying the reaction norms of best single and co-infection.

#------- optimal cue reaction norm -----------#
# read data
```{r}
# single infection dynamics, reaction norms, and rugs
si_dyn.df <- read_parquet(here("data/si_dyn/si_dyn_30.parquet")) 
si_rn.df <- read_parquet(here("data/si_dyn/si_rn.parquet"))
si_rug.df <- read_parquet(here("data/si_dyn/si_rug.parquet"))

# co-infection dynamics, reaction norms, and rugs
ci_dyn.df <- read_parquet(here("data/ci_dyn/ci_dyn.parquet"))
ci_rn.df <- read_parquet(here("data/ci_dyn/ci_rn.parquet"))
ci_rug.df <- read_parquet(here("data/ci_dyn/ci_rug.parquet"))
```

# process data for reaction norm
```{r}
# import labelling scheme
ez_label <- read.csv(here("data/ez_label.csv"))

# get si_label with ci cue range
si_ci_rug.df <- ci_rug.df %>% 
  mutate(label_si = case_when(
    label %in% c("I", "I1+I2") ~ "I",
    label %in% c("I log","I1+I2 log") ~ "I log",
    label %in% c("Ig", "Ig1+Ig2") ~ "Ig",
    label %in% c("Ig log") ~ "Ig log",
    label %in% c("sum", "I+Ig") ~ "I+Ig",
    label %in% c("sum log", "I+Ig log") ~ "I+Ig log",
    label == "R" ~ "R",
    label == "R log" ~ "R log",
    label %in% c("G", "G1+G2") ~ "G",
    label == "G log" ~ "G log"
  )) 

# get limit for si_rug
si_rug_lim.df <- si_rug.df %>% 
  filter(time <= 20) %>%
  group_by(label)%>% 
  summarise(min = min(value, na.rm = T)*0.9,
         max = max(value, na.rm = T)*1.1) %>% 
  select(label_si = label, min_si = min, max_si = max)

# filter to restriction conversion rate reaction norm range to cue ranges that appear in rug
## change to Inf/-inf to NA. Note that I am first joining with si rug lim to check which limit is larger, We will go with the cue range that has the largest span
ci_rug_lim.df <- si_ci_rug.df %>% 
  group_by(label) %>% 
  mutate(min = min(value, na.rm = T)*0.9,
         max = max(value, na.rm = T)*1.1) %>% 
  distinct(label, .keep_all = T) %>% 
  select(label, label_si, min, max)

rug_lim.final <- ci_rug_lim.df %>% left_join(si_rug_lim.df, by = "label_si") %>% 
  mutate(final_min = min(min, min_si),
         final_max = max(max, max_si))

# get second rug_lim.final for single infection
rug_lim.final2 <- rug_lim.final %>% 
  group_by(label_si) %>% 
  mutate(final_min = min(final_min, na.rm = T),
         final_max = max(final_max, na.rm = T))

# filter ci_rn by limit
ci_rn.df2 <- ci_rn.df %>% 
  left_join(rug_lim.final, by  = "label") %>% 
  group_by(label) %>% 
  filter(cue_range <= final_max & cue_range >= final_min)
```

# match single infection rn with coinfection 
```{r}
# get ci label to si rug and filter by limit
si_rn.df2 <- left_join(si_rn.df, select(ez_label, label_si, label_ci), by = c("label" = "label_si")) %>% 
  left_join(rug_lim.final, by  = c("label_ci" = "label")) %>% 
  group_by(label_ci) %>% 
  filter(cue_range <= final_max & cue_range >= final_min) %>% 
  select(cue_range, cr, label_ci, label_si)

# get ci label to si rug
si_rug.df2 <- select(si_rug.df, value, label_si = label) 
```

# plot reaction norm
```{r}
# join with ezlabel
ci_rn.df3 <- ci_rn.df2 %>% left_join(ez_label, by = c("label" = "label_ci"))
si_rn.df3 <- si_rn.df2 %>% left_join(ez_label, by = "label_ci")
ci_rug.df3 <- ci_rug.df %>% left_join(ez_label, by = c("label" = "label_ci"))
si_rug.df3 <- si_rug.df2 %>% left_join(ez_label, by = "label_si")
```


```{r}
# redo order of cues
ci_rn.df3$ez_label <- factor(ci_rn.df3$ez_label, 
                              levels = c("Asexual iRBC", "Asexual iRBC log10",
                                         "Total asexual iRBC", "Total asexual\niRBC log10",
                                         "Sexual iRBC", "Sexual iRBC log10",
                                         "Asexual&sexual iRBC", "Asexual&sexual\niRBC log10",
                                         "Total iRBC", "Total iRBC log10",
                                         "Gametocyte", "Gametocyte log10",
                                         "Total gametocyte", "Total sexual iRBC",
                                         "RBC", "RBC log10")) 

si_rn.df3$ez_label <- factor(si_rn.df3$ez_label, 
                              levels = c("Asexual iRBC", "Asexual iRBC log10",
                                         "Total asexual iRBC", "Total asexual\niRBC log10",
                                         "Sexual iRBC", "Sexual iRBC log10",
                                         "Asexual&sexual iRBC", "Asexual&sexual\niRBC log10",
                                         "Total iRBC", "Total iRBC log10",
                                         "Gametocyte", "Gametocyte log10",
                                         "Total gametocyte", "Total sexual iRBC",
                                         "RBC", "RBC log10"))

ci_rug.df3$ez_label <- factor(ci_rug.df3$ez_label, 
                              levels = c("Asexual iRBC", "Asexual iRBC log10",
                                         "Total asexual iRBC", "Total asexual\niRBC log10",
                                         "Sexual iRBC", "Sexual iRBC log10",
                                         "Asexual&sexual iRBC", "Asexual&sexual\niRBC log10",
                                         "Total iRBC", "Total iRBC log10",
                                         "Gametocyte", "Gametocyte log10",
                                         "Total gametocyte", "Total sexual iRBC",
                                         "RBC", "RBC log10"))

si_rug.df3$ez_label <- factor(si_rug.df3$ez_label, 
                              levels = c("Asexual iRBC", "Asexual iRBC log10",
                                         "Total asexual iRBC", "Total asexual\niRBC log10",
                                         "Sexual iRBC", "Sexual iRBC log10",
                                         "Asexual&sexual iRBC", "Asexual&sexual\niRBC log10",
                                         "Total iRBC", "Total iRBC log10",
                                         "Gametocyte", "Gametocyte log10",
                                         "Total gametocyte", "Total sexual iRBC",
                                         "RBC", "RBC log10"))
# plot
opt_cue_pl.B <- ggplot() +
  geom_line(data = ci_rn.df3, aes(x = cue_range, y = cr, color = "Co-infection")) +
  geom_point(data = ci_rn.df3 %>% 
    group_by(label) %>% 
    mutate(ten_th = round(n()/10)) %>% 
    filter(row_number() %% ten_th == 0), aes(x = cue_range, y = cr, color = "Co-infection", shape = "Co-infection"), size = 2) +
  geom_line(data = si_rn.df3, aes(x = cue_range, y = cr, color = "Single infection")) +
  geom_point(data = si_rn.df3 %>% 
    group_by(label_ci) %>% 
    mutate(ten_th = round(n()/10)) %>% 
    filter(row_number() %% ten_th == 0), aes(x = cue_range, y = cr, color = "Single infection", shape = "Single infection"), size = 2) +
  geom_rug(data = ci_rug.df3, aes(x = value), color = "#4575b4", sides = "t", length = unit(0.1, "npc")) +
  geom_rug(data = si_rug.df3, aes(x = value), color = "#fc8d59", sides = "b", length = unit(0.1, "npc")) +
  facet_wrap(~ez_label, scales = "free_x", ncol = 2) +
  ylim(-0.3, 1.3) +
  theme_bw() +
  labs(y = "Conversion rate", x = "Cue range", color = "Model", shape = "Model") +
  scale_x_continuous(labels = function(x) format(x, scientific = T),
                     guide = guide_axis(check.overlap = TRUE)) + 
                    theme(axis.text.x = element_text(size = 7),
                          legend.position = "bottom")  +
  scale_color_manual(values=c( "#4575b4", "#fc8d59")) +
  theme(strip.text.x = element_text(margin = margin(b = 0.5, t = 0.5)))
```

#---------- plot best fitness x vs y plot --------------#
# import in data
```{r}
# single infection dynamics
si_dyn.df <- read_parquet(here("data/si_dyn/si_dyn_30.parquet")) 

# co-infection dynamics
ci_dyn.df <- read_parquet(here("data/ci_dyn/ci_dyn.parquet"))

ez_label <- read.csv(here("data/ez_label.csv"))
```

# process for final 20 days fitness
```{r}
# get single infection maximum tau_cum for 20 days
si_fitness.df <- si_dyn.df %>% 
  filter(variable == "tau_cum" & time == 20)

# get co-infection maximum tau_cum for 20 days
ci_fitness.df <- ci_dyn.df %>% 
  filter(variable == "tau_cum1" & time == 20)

# join together two dataframes and add labels
si_ci_fitness.df <- select(ci_fitness.df, fitness_ci = value, label_ci = label) %>% 
  left_join(ez_label, by = "label_ci") %>% 
  left_join(select(si_fitness.df, fitness_si = value, id_si = id), by = "id_si")
```

# plot
```{r}
opt_cue_pl.A <- ggplot() +
  geom_point(data = si_ci_fitness.df, aes(x = fitness_si, y = fitness_ci, color = ez_label_si, shape = ez_label_si), size = 3.5) +
  ggrepel::geom_label_repel(data = si_ci_fitness.df, aes(label = ez_label, x = fitness_si, y = fitness_ci),
                            fill = "white",xlim = c(-Inf, Inf), ylim = c(NA, NA)) +
  labs(x = "Maximum single infection fitness", y = "Maximum Co-infection fitness (per strain)",
       color = "Single infection cue", shape = "Single infection cue") +
  scale_shape_manual(values = 15:24) +
  lims(x = c(8, 10.5)) +
  geom_vline(xintercept = 9.2, linetype = "dashed", alpha = 0.5) + # si good cue cutoff
  geom_hline(yintercept = 2.25, linetype = "dashed", alpha = 0.5) + # ci good cue cutoff
  theme_bw() +
  coord_cartesian(clip = "off")
```

# plot reaction norm and fitness value together
```{r}
ggarrange(opt_cue_pl.A, opt_cue_pl.B, widths = c(0.6, 0.4), align = "h", labels = c("A", "B"))
ggsave(units = "px", dpi = 300, width = 2000, height = 1300, filename = here("figures/plos-bio/rn_fitness.tiff"), bg = "white", scale = 2.1)
```

#===========================================================#
# Demographic stochasticity
#===========================================================#
#---------- plot heat map---------------#
# import in all fitness files
```{r}
file_ls <- list.files(path = here("data/MC_partitioned/"), pattern = "*.csv", full.names = T)
name_ls <- list.files(path = here("data/MC_partitioned/"), pattern = "*.csv")
name_ls <- gsub("*.csv", "", name_ls)

# 60, which is about right
length(file_ls)

# read in files
fitness.ls <- lapply(file_ls, read.csv)

# assign unique ID
fitness.ls <- mapply(cbind, fitness.ls, "ID" = name_ls, SIMPLIFY = F)
```

# process data
```{r}
# get metainfo from ID
fitness.ls2 <- mclapply(fitness.ls, function(x){
  id_col <- x$ID
  # string split to extract all info
  cue <- unlist(str_split(unique(id_col), pattern = "_"))[[3]]
  log <- unlist(str_split(unique(id_col), pattern = "_"))[[4]]
  rand_var <- unlist(str_split(unique(id_col), pattern = "_"))[[5]]
  
  # get mean
  mean_fitness <- mean(x$max_fitness)
  # get sd
  sd_fitness <- sd(x$max_fitness)
  
  # bind results
  res <- cbind(x, cue= cue, log = log, rand_var = rand_var, mean_fitness = mean_fitness, sd_fitness = sd_fitness)
  return(res)
})
```

# Get reference data
```{r}
reference_ls <- list.files(path = here("data/MC2"), pattern = "*.csv", full.names = T)
reference_name.ls <- gsub("*.csv", "", list.files(path = here("data/MC2/"), pattern = "*.csv"))

# read in the files
reference.ls <- lapply(reference_ls, read.csv)

# assign unique ID
reference.ls <- mapply(cbind, reference.ls, "ID" = reference_name.ls, SIMPLIFY = F)

# get meta data
reference.ls2 <- mclapply(reference.ls, function(x){
  id_col <- x$ID
  # string split to extract all info
  cue <- unlist(str_split(unique(id_col), pattern = "_"))[[2]]
  # get log
  third_col <- unlist(str_split(unique(id_col), pattern = "_"))[[3]]
  log <- ifelse(third_col == "log", "log10", "none")
  
  # get mean
  mean_fitness <- mean(x$max_fitness)
  
  # get sd
  sd_fitness <- sd(x$max_fitness)
  
  # bind results
  res <- cbind(x, cue= cue, log = log, rand_var = "all", ref_mean_fitness = mean_fitness, ref_sd_fitness = sd_fitness)
  return(res)
})
```

# combine MC partitioned and reference df
```{r}
# get unique column values for each cue, log, and rand_var combo
fitness.ls3 <- do.call(rbind, fitness.ls2)
fitness.ls3 <- fitness.ls3 %>% dplyr::distinct(ID, .keep_all = T)

# repeat with reference
reference.ls3 <- do.call(rbind, reference.ls2)
reference.ls3 <- reference.ls3 %>% dplyr::distinct(ID, .keep_all = T)

# combine!
ref_fit.df <- left_join(fitness.ls3, reference.ls3, by = c("cue" = "cue", "log"= "log"))
```

# compute proportion fitness and variation
```{r}
ref_fit.df2 <- ref_fit.df %>% 
  mutate(p_sd = sd_fitness/ref_sd_fitness,
         p_mean = ref_mean_fitness/mean_fitness,
         cue_log = paste0(cue, "_", log),
         label = case_when(
           cue == "G" ~ "Gametocyte",
           cue == "I" ~ "Asexual iRBC",
           cue == "I+Ig" ~ "Asexual&sexual\niRBC",
           cue == "Ig" ~ "Sexual iRBC",
           cue == "R" ~ "RBC"
           ),
         parameter = case_when(
           rand_var.x == "rho" ~ "ρ",
           rand_var.x == "phin" ~ "ϕn",
           rand_var.x == "phiw"~ "ϕw",
           rand_var.x == "psin" ~ "ψn",
           rand_var.x == "psiw" ~ "ψw",
           rand_var.x == "beta" ~ "β"
         ))
```

# plot!
```{r}
# variation
mc_b <- ggplot() +
  geom_tile(data = ref_fit.df2 , aes(x = label, y = parameter, fill = p_sd)) +
  facet_wrap(~log) +
  theme_bw() +
  viridis::scale_fill_viridis() +
  labs(x = "Cue", y = "Parameter randomized", fill = expression(frac(sd("1 parameter randomized"), sd("all parameters randomized")))) +
  theme(legend.position="top",
        axis.text.x = element_text(angle = 45, hjust=1))

# mean fitness
mc_c <- ggplot() +
  geom_tile(data = ref_fit.df2 , aes(x = label, y = parameter, fill = p_mean)) +
  facet_wrap(~log) +
  theme_bw() +
  viridis::scale_fill_viridis() +
  labs(x = "Cue", y = "Parameter randomized", fill = expression(frac(Mean("all parameters randomized"), Mean("1 parameter randomized")))) +
  theme(legend.position="top",
        axis.text.x = element_text(angle = 45, hjust=1))

mc_partition <- ggarrange(mc_b, mc_c, ncol = 1)

```

#-------------- get violine plot of variation in fitness --------------------#
# read MC data
```{r}
# read in dymamics
mc_G_log.dyn <- read_parquet(here("data/MC2/mc_G_log_dyn.parquet"))
mc_G.dyn <- read_parquet(here("data/MC2/mc_G_dyn.parquet"))
mc_R_log.dyn <- read_parquet(here("data/MC2/mc_R_log_dyn.parquet"))
mc_R.dyn <- read_parquet(here("data/MC2/mc_R_dyn.parquet"))
mc_I_log.dyn <- read_parquet(here("data/MC2/mc_I_log_dyn.parquet"))
mc_I.dyn <- read_parquet(here("data/MC2/mc_I_dyn.parquet"))
mc_Ig_log.dyn <- read_parquet(here("data/MC2/mc_Ig_log_dyn.parquet"))
mc_Ig.dyn <- read_parquet(here("data/MC2/mc_Ig_dyn.parquet"))
mc_I_Ig_log.dyn <- read_parquet(here("data/MC2/mc_I+Ig_log_dyn.parquet"))
mc_I_Ig.dyn <- read_parquet(here("data/MC2/mc_I+Ig_dyn.parquet"))

# read in fitness
mc_G_log.fitness <- read.csv(here("data/MC2/mc_G_log_fitness.csv"))
mc_G.fitness <- read.csv(here("data/MC2/mc_G_fitness.csv"))
mc_R_log.fitness <- read.csv(here("data/MC2/mc_R_log_fitness.csv"))
mc_R.fitness <- read.csv(here("data/MC2/mc_R_fitness.csv"))
mc_I_log.fitness <- read.csv(here("data/MC2/mc_I_log_fitness.csv"))
mc_I.fitness <- read.csv(here("data/MC2/mc_I_fitness.csv"))
mc_Ig_log.fitness <- read.csv(here("data/MC2/mc_Ig_log_fitness.csv"))
mc_Ig.fitness <- read.csv(here("data/MC2/mc_Ig_fitness.csv"))
mc_I_Ig_log.fitness <- read.csv(here("data/MC2/mc_I+Ig_log_fitness.csv"))
mc_I_Ig.fitness <- read.csv(here("data/MC2/mc_I+Ig_fitness.csv"))
```

# examine variation
```{r}
# plot fitness vs iteration
fitness.df <- rbind(
  cbind(mc_G_log.fitness, id = "Gametocyte\nlog10"),
  cbind(mc_G.fitness, id = "Gametocyte"),
  cbind(mc_R_log.fitness, id = "RBC log10"),
  cbind(mc_R.fitness, id = "RBC"),
  cbind(mc_I_log.fitness, id = "Asexual iRBC\nlog10"),
  cbind(mc_I.fitness, id = "Asexual iRBC"),
  cbind(mc_Ig_log.fitness, id = "Sexual iRBC\nlog10"),
  cbind(mc_Ig.fitness, id = "Sexual iRBC"),
  cbind(mc_I_Ig_log.fitness, id = "Asexual&sexual iRBC\nlog10"),
  cbind(mc_I_Ig.fitness, id = "Asexual&sexual\niRBC")
)

# quantify variance and mean
fitness_var.df <- fitness.df %>% 
  dplyr::group_by(id) %>% 
  dplyr::summarise(median = median(max_fitness)) %>% 
  dplyr::mutate(id = forcats::fct_reorder(id, median))
```

# plot violin with difference in deterministic model fitness and mean model fitness
```{r}
# get deterministic df
det.df <- data.frame(fitness_var.df, `Maximum fitness` =  c(8.49777, 9.494991,8.854682,9.573291,8.58856,9.561373,8.23991,8.181604,8.569285,9.618812)) %>% 
  dplyr::mutate(id = forcats::fct_reorder(id, median)) %>% 
  tidyr::pivot_longer(-id) %>% 
  mutate(classification = ifelse(name == "Maximum.fitness", "Optimal single infection", "Median Monte Carlo"))

mc_a <- ggplot() +
  geom_point(data = det.df, aes(y = id, x = value, shape = classification, color = classification), size = 3) +
  geom_violin(data = fitness.df, aes(y = id, x = max_fitness), fill = "transparent") +
  labs(x = "Fitness", y = "Cue", color = "Fitness", shape = "Fitness") +
  theme_bw() +
  theme(legend.position = "bottom") +
  scale_color_manual(values=c( "#4575b4", "#fc8d59"))
```

#-------------- plot together-----------------------#
```{r}
# arrange the heat map
ggarrange(mc_a, mc_partition, heights = c(0.4, 0.6), ncol = 1, labels = c("A", "B"))

# save
ggsave(units = "px", dpi = 300, width = 1000, height = 2000, filename = here("figures/plos-bio/MC.tiff"), bg = "white", scale = 1.8)
```

#=========================================================#
# time series conversion rate for single and co-infection
#=========================================================#
#--------- single infection conversion rate heat map--------------#
# process info for single infection
```{r}
# get fintess
si_cr.df <- si_dyn.df %>% 
  filter(time <= 20 & variable == "cr")

# good cue bad cue
si_cue.dv <- si_fitness.df %>% 
  mutate(classification = case_when(
    value > 9.2 ~ "Good performing",
    value <= 9.2 ~ "Poor performing"
  ))

# join with classificaiton
si_cr.df2 <- si_cr.df %>% 
  left_join(select(si_cue.dv, id, classification, fitness_si = value), by = "id") %>% 
  left_join(ez_label, by = c("id" = "id_si"))

# split into top erforming and poor-performing cues
si_cr.high <- si_cr.df2 %>% filter(classification == "Good performing")
si_cr.poor <- si_cr.df2 %>% filter(classification == "Poor performing")

si_cue.dv
```

# plot single infection conversion rate heatmap
```{r}
# plot poor performing
si_cr.pl1 <- ggplot() +
  geom_raster(data = si_cr.poor, aes(x = time, y = forcats::fct_reorder(ez_label_si, fitness_si), fill = value)) +
  labs(x = "Time (days)", y = "Poor performing\nsingle infection cues", fill = "Conversion rate") +
  scale_fill_viridis_c(limits = c(0, 1)) +
  xlim(1, 20) +
  theme_bw()

# plot high perfomring
si_cr.pl2 <- ggplot() +
  geom_raster(data = si_cr.high, aes(x = time, y = forcats::fct_reorder(ez_label_si, fitness_si), fill = value)) +
  labs(x = "", y = "Good performing\nsingle infection cues", fill = "Conversion rate") +
  scale_fill_viridis_c(limits = c(0, 1)) +
  xlim(1, 20) +
  theme_bw()
```

# -----------------co-infection conversion rate heatmap-----------#
# plot co-infeciton convesion rate heatmap
```{r}
# get fitess
ci_cr.df <- ci_dyn.df %>% 
  filter(time <= 20 & variable == "cr_1")

# good cue bad cue
ci_cue.dv <- ci_fitness.df %>% 
  mutate(classification = case_when(
    value > 2.25 ~ "Good performing",
    value <= 2.25 ~ "Poor performing"
  ))

# join with classificaiton
ci_cr.df2 <- ci_cr.df %>% 
  left_join(select(ci_cue.dv, label, classification, fitness_ci = value), by = "label") %>% 
  left_join(ez_label, by = c("label" = "label_ci"))

# split into top erforming and poor-performing cues
ci_cr.high <- ci_cr.df2 %>% filter(classification == "Good performing")
ci_cr.poor <- ci_cr.df2 %>% filter(classification == "Poor performing")
```


# plot
```{r}
# plot poor performing
ci_cr.pl1 <- ggplot() +
  geom_raster(data = ci_cr.poor, aes(x = time, y = forcats::fct_reorder(ez_label, fitness_ci), fill = value), alpha = 1) +
  labs(x = "Time (days)", y = "Poor performing\nco-infection cues", fill = "Conversion rate") +
  scale_fill_viridis_c(limits = c(0, 1)) +
  xlim(1, 20) +
  theme_bw()

# plot high perfomring
ci_cr.pl2 <- ggplot() +
  geom_raster(data = ci_cr.high, aes(x = time, y = forcats::fct_reorder(ez_label, fitness_ci), fill = value), alpha = 1) +
  labs(x = "", y = "Good performing\nco-infection cues", fill = "Conversion rate") +
  scale_fill_viridis_c(limits = c(0, 1)) +
  xlim(1, 20) +
  theme_bw()
```

#--------- assemble final figure --------------#
```{r}
si_cr.pl <- ggarrange(si_cr.pl2, si_cr.pl1, common.legend = T, ncol = 1, nrow = 2)
ci_cr.pl <- ggarrange(ci_cr.pl2, ci_cr.pl1, common.legend = T, ncol = 1, nrow = 2)

ggarrange(si_cr.pl, ci_cr.pl, labels = c("A", "B"), ncol = 2, common.legend = T)
ggsave(units = "px", dpi = 300, width = 2000, height = 1000, filename = here("figures/plos-bio/time_cr.tiff"), bg = "white", scale = 1.7)
```

#--------------------------------------------#
# dual cue optimization figure
#--------------------------------------------#
```{r}
source(here("functions/chabaudi_si_clean_high.R"))
source(here("functions/chabaudi_si_clean.R"))
source(here("functions/par_to_hm_te.R"))
```

#---------- plotting fitness of dual vs single cue opt ---------#
# import in previous data
```{r}
# dual cue fitness
dual_fitness.df <- read.csv(here("data/dual_cue_opt4/dual_cue_fitness_20.csv"))
## make label and filter out very low fitness
dual_fitness.df <- dual_fitness.df %>% 
  mutate(temp_label = gsub("log", "log10", label),
         temp_label_b = gsub("log", "log10", label_b),
         label_final = paste0(temp_label, "+", temp_label_b)) %>% 
  filter(value > 2)

# get single cue fitness
si_dyn.df <- read_parquet(here("data/si_dyn/si_dyn_30.parquet")) 
si_fitness.df <- si_dyn.df %>% 
  filter(variable == "tau_cum" & time == 20)

# join si and dual cue
dual_si_fitness.df <- dual_fitness.df %>% 
  left_join(select(si_fitness.df, id, si_fitness = value), by = "id") %>% 
  left_join(select(si_fitness.df, id_b = id, si_fitness_b = value), by = "id_b") %>% 
  mutate(si_fitness_max = ifelse(si_fitness > si_fitness_b, si_fitness, si_fitness_b),
         dual_label = gsub("log", "log10", paste(label, "+", label_b)))
```

# plot
```{r}
dual_si_fitness.pl <- ggplot() +
  geom_point(data = dual_si_fitness.df, aes(y = forcats::fct_reorder(dual_label, value), x = value, color = "Dual cue", shape = "Dual cue"), size = 3, alpha = 0.7) +
  geom_point(data = dual_si_fitness.df, aes(y = forcats::fct_reorder(dual_label, value), x = si_fitness_max, color= "Best single cue", shape= "Best single cue"), size = 3, alpha = 0.7) +
  labs(x = "Fitness", y = "Dual cue", color = "Cue", shape = "Cue") +
  scale_color_manual(values=c("#fc8d59", "#4575b4")) +
  geom_vline(xintercept = 9.883602) +
  geom_text(aes(x=9.883602, label="\nIdeal fitness (df=9)", y = "G + R log10"), angle=90) +
  theme_bw()
```


#----------- time series conversion rate -------------#
# dynamics simulation of high parameter cues (these serve as reference points)
```{r}
# best dual cue combo
dual.cr <- chabaudi_si_clean(
  parameters_cr = c(4.446192033,	10.97518275,	1.38762817,	23.3059254,	-3.452052371,	-18.0070692,	39.66614226,	-3.545193141,	18.78350799),
  immunity = "tsukushi",
  parameters = parameters_tsukushi,
  time_range = seq(0, 20, by = 1e-3),
  cue_range =  seq(6, 7, by = 1/500),
  cue_range_b = seq(0, log10(6*(10^6)), by = (log10(6*(10^6)))/500),
  cue = "R",
  cue_b = "I",
  log_cue = "log10",
  log_cue_b = "log10",
  solver = "vode",
  dyn = T
)

# when time is used as a cue (high parameter)
time.cr <- chabaudi_si_clean_high(
  parameters_cr = c(9.154314,  -7.570829, -22.506638 ,  3.382405 ,-13.453519 ,-17.011485  , 3.678181, -12.851895 ,-26.115158),
  immunity = "tsukushi",
  parameters = parameters_tsukushi,
  time_range = seq(0, 20, by = 1e-3),
  cue_range =  seq(0, 20, by = 1e-3),
  cue = "t",
  solver = "vode",
  dyn = T)

# when asexual iRBC is used as a cue (high flexibility)
I_high.cr <- chabaudi_si_clean_high(
  parameters_cr = c(1.296675,  3.544034 , 4.907484,  2.174249, -3.238309 ,-5.181614 ,-1.645072 , 1.834302 , 1.581011),
  immunity = "tsukushi",
  parameters = parameters_tsukushi,
  time_range = seq(0, 20, by = 1e-3),
  cue_range =  seq(0, log10(6*(10^6)), by = (log10(6*(10^6)))/5000),
  cue = "I",
  log_cue = "log10",
  solver = "vode",
  dyn = T)

# when asexual iRBC is used as a cue (normal flexibility)
I.cr <- chabaudi_si_clean(
  parameters_cr = c(5.463558,	2.383948,	-17.757281,	4.571835),
  immunity = "tsukushi",
  parameters = parameters_tsukushi,
  time_range = seq(0, 20, by = 1e-3),
  cue_range =  seq(0, log10(6*(10^6)), by = (log10(6*(10^6)))/5000),
  cue = "I",
  log_cue = "log10",
  solver = "vode",
  dyn = T)

# process 
I_high.cr2 <- I_high.cr %>% filter(variable == "cr") %>% mutate(label_new = "I log10 (df=9)") %>% select(-variable)

I.cr2 <- I.cr %>% filter(variable == "cr") %>% mutate(label_new = "I log10 (df=3)") %>% select(-variable)

time_high.cr2 <- time.cr %>% filter(variable == "cr") %>% mutate(label_new = "Ideal (df=9)") %>% select(-variable)

dual.cr2 <- dual.cr %>% filter(variable == "cr") %>% mutate(label_new = "R log10 + I log10 (df=9)") %>% select(-variable)

# combine
dual_cr.df <- rbind(I_high.cr2, I.cr2, time_high.cr2, dual.cr2)
```

# plot
```{r}
dual_cr.pl <- ggplot() +
  geom_line(data = dual_cr.df, aes(color = label_new, x = time, y = value), size = 1) +
  geom_point(data = dual_cr.df %>% filter(time%%1 == 0), aes(color = label_new, x = time, y = value, shape = label_new), size = 3) +
  labs(x = "Time (days)", y = "Conversion rate", color = "Cue", shape = "Cue") +
  xlim(0, 20) +
  scale_color_manual(values = c("#fc8d59","#fdcb44","black", "#4575b4")) +
  theme_bw() +
  theme(legend.position="bottom") +
  guides(color = guide_legend(nrow = 2, byrow = TRUE))
```

#------------ reaction norm heatmap of R log10 + I log10 ------------#
# process data
```{r}
# make heatmap df
R_I.hm <- par_to_hm_te(par = c(4.446192033,	10.97518275,	1.38762817,	23.3059254,	-3.452052371,	-18.0070692,	39.66614226,	-3.545193141,	18.78350799),
             cue_range = seq(6,	7, length.out = 500),
             cue_range_b = seq(0,	6.77815125, length.out = 500))

# process dynamics
R_I.dyn <- dual.cr %>% 
  tidyr::pivot_wider(names_from = variable, values_from = value) %>% 
  mutate(log_R = log10(R),
         log_I = log10(I))

# examine sexual iRBC as well
R_Ig.dyn <- dual.cr %>% 
  tidyr::pivot_wider(names_from = variable, values_from = value) %>% 
  mutate(log_R = log10(R),
         log_Ig = log10(Ig))
max(R_Ig.dyn$Ig) 
```

# plot
```{r}
dual_rn.pl <- ggplot() +
  geom_raster(data = R_I.hm, aes(x = cue_range_b, y = cue_range, fill = cr)) +
  scale_fill_viridis_c() +
  geom_path(data = R_I.dyn, aes(x = log_I, y = log_R), color = "white", arrow = arrow(angle = 30, length = unit(0.1, "inches"))) +
  geom_point(data = R_I.dyn %>% filter(row_number() %% 1000 == 1 & time <= 20), aes(x = log_I, y = log_R), color = "white") +
  xlim(0.99*min(hablar::s(R_I.dyn$log_I), na.rm = T), 1.01* max(hablar::s(R_I.dyn$log_I), na.rm = T)) +
  ylim(0.99*min(hablar::s(R_I.dyn$log_R), na.rm = T),1.01* max(hablar::s(R_I.dyn$log_R), na.rm = T)) +
  labs(y = "RBC log10", x = "Asexual iRBC log10", fill = "Conversion rate") +
  theme_dark()

# just testing for sexual iRBC vs RBC
ggplot() +
  geom_path(data = R_Ig.dyn, aes(x = Ig, y = log_R), color = "white", arrow = arrow(angle = 30, length = unit(0.1, "inches"))) +
  geom_point(data = R_Ig.dyn %>% filter(row_number() %% 1000 == 1 & time <= 20), aes(x = Ig, y = log_R), color = "white") +
  scale_x_continuous(trans = "log10") +
  xlim(0, 200000) +
  labs(y = "RBC log10", x = "Sexual iRBC log10", fill = "Conversion rate") +
  theme_dark()
```

# save figure for poster
```{r}
dual_rn.pl2 <- ggplot() +
  geom_raster(data = R_I.hm, aes(x = cue_range_b, y = cue_range, fill = cr)) +
  scale_fill_viridis_c() +
  geom_path(data = R_I.dyn, aes(x = log_I, y = log_R), color = "white", arrow = arrow(angle = 30, length = unit(0.1, "inches"))) +
  geom_point(data = R_I.dyn %>% filter(row_number() %% 1000 == 1 & time <= 20), aes(x = log_I, y = log_R), color = "white") +
  xlim(0.99*min(hablar::s(R_I.dyn$log_I), na.rm = T), 1.01* max(hablar::s(R_I.dyn$log_I), na.rm = T)) +
  ylim(0.99*min(hablar::s(R_I.dyn$log_R), na.rm = T),1.01* max(hablar::s(R_I.dyn$log_R), na.rm = T)) +
  labs(y = "RBC log10", x = "Asexual iRBC log10", fill = "Conversion rate") +
  theme_dark() + theme(legend.position="top") 

dual_cr.pl2 <- ggplot() +
  geom_line(data = dual_cr.df, aes(color = label_new, x = time, y = value), size = 1) +
  geom_point(data = dual_cr.df %>% filter(time%%1 == 0), aes(color = label_new, x = time, y = value, shape = label_new), size = 2) +
  labs(x = "Time (days)", y = "Conversion rate", color = "Cue", shape = "Cue") +
  xlim(0, 20) +
  scale_color_manual(values = c("#4575b4", "#91bfdb","#fc8d59","#fdcb44")) +
  theme_bw() +
  theme(legend.position="top") +
  guides(color = guide_legend(nrow = 2, byrow = TRUE))

ggarrange(dual_cr.pl2, dual_rn.pl2, align = "h", widths = c(1.25, 1))
ggsave(here("poster/dual_cue.png"), width = 7, height = 4)
```


#-------- assemble final figure -------------#
```{r}
# assemble panel B and C
dual_pl.BC <- ggarrange(dual_cr.pl, dual_rn.pl, align = "v", ncol = 1, labels = c("B", "C"))

# assemble panel A
ggarrange(dual_si_fitness.pl, dual_pl.BC, ncol = 2, labels = c("A", ""), widths = c(1, 0.75))
ggsave(here("figures/plos-bio/dual_cue.tiff"), units = "px", width = 2250, height = 1400, scale = 1.5, dpi=300,  bg = "white")
```


#============================================#
# real life disease curve
#============================================#
# execute code from report 10 to get final dataset
The following graphs will be made:
- R vs iRBC
- R log10 vs iRBC
- R vs iRBC log10
- R log10 vs iRBC log10

- G vs iRBC
- G log10 vs iRBC
- G vs iRBC log10
- G log10 vs iRBC log10

- R vs G
- R log10 vs G
- R vs G log10
- R log10 vs G log10

R on y-axis
```{r}
r_i.dc <- ggplot(exp_ss.df) +
  geom_point(aes(y = RBC, x = asex)) +
  geom_path(aes(y = RBC, x = asex, colour = day, group = id), arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  theme_bw() + 
  scale_color_viridis_c(limits = c(3, 21)) +
  labs(x = "iRBC per µL", y = "RBC per µL", color = "Days\npost-infection") +
  scale_y_continuous(labels = function(x) format(x, scientific = TRUE))

rlog_i.dc <- ggplot(exp_ss.df) +
  geom_point(aes(y = log10(RBC), x = asex)) +
  geom_path(aes(y = log10(RBC), x = asex, colour = day, group = id), arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  theme_bw() + 
  scale_color_viridis_c(limits = c(3, 21)) +
  labs(x = "iRBC per µL", y = "log10(RBC) per µL", color = "Days\npost-infection") +
  scale_y_continuous(labels = function(x) format(x, scientific = TRUE))

r_ilog.dc <-ggplot(exp_ss.df) +
  geom_point(aes(y = RBC, x = log10(asex))) +
  geom_path(aes(y = RBC, x = log10(asex), colour = day, group = id), arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  theme_bw() + 
  scale_color_viridis_c(limits = c(3, 21)) +
  labs(x = "log10(iRBC) per µL", y = "RBC per µL", color = "Days\npost-infection") +
  scale_y_continuous(labels = function(x) format(x, scientific = TRUE))

rlog_ilog.dc <- ggplot(exp_ss.df) +
  geom_point(aes(y = log10(RBC), x = log10(asex))) +
  geom_path(aes(y = log10(RBC), x = log10(asex), colour = day, group = id), arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  theme_bw() + 
  scale_color_viridis_c(limits = c(3, 21)) +
  labs(x = "log10(iRBC) per µL", y = "log10(RBC) per µL", color = "Days\npost-infection") +
  scale_y_continuous(labels = function(x) format(x, scientific = TRUE))
```

G on y-axis
```{r}
g_i.dc <- ggplot(exp_ss.df) +
  geom_point(aes(y = gam, x = asex)) +
  geom_path(aes(y = gam, x = asex, colour = day, group = id), arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  theme_bw() + 
  scale_color_viridis_c(limits = c(3, 21)) +
  labs(x = "iRBC per µL", y = "Gametocyte per µL", color = "Days\npost-infection") +
  scale_y_continuous(labels = function(x) format(x, scientific = TRUE))

glog_i.dc <- ggplot(exp_ss.df) +
  geom_point(aes(y = log10(gam), x = asex)) +
  geom_path(aes(y = log10(gam), x = asex, colour = day, group = id), arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  theme_bw() + 
  scale_color_viridis_c(limits = c(3, 21)) +
  labs(x = "iRBC per µL", y = "log10(Gametocyte) per µL", color = "Days\npost-infection") +
  scale_y_continuous(labels = function(x) format(x, scientific = TRUE))

g_ilog.dc <- ggplot(exp_ss.df) +
  geom_point(aes(y = gam, x = log10(asex))) +
  geom_path(aes(y = gam, x = log10(asex), colour = day, group = id), arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  theme_bw() + 
  scale_color_viridis_c(limits = c(3, 21)) +
  labs(x = "log10(iRBC) per µL", y = "Gametocyte per µL", color = "Days\npost-infection") +
  scale_y_continuous(labels = function(x) format(x, scientific = TRUE))

glog_ilog.dc <- ggplot(exp_ss.df) +
  geom_point(aes(y = log10(gam), x = log10(asex))) +
  geom_path(aes(y = log10(gam), x = log10(asex), colour = day, group = id), arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  theme_bw() + 
  scale_color_viridis_c(limits = c(3, 21)) +
  labs(x = "log10(iRBC) per µL", y = "log10(Gametocyte) per µL", color = "Days\npost-infection") +
  scale_y_continuous(labels = function(x) format(x, scientific = TRUE))
```

R vs G
```{r}
r_g.dc <- ggplot(exp_ss.df) +
  geom_point(aes(y = RBC, x = gam)) +
  geom_path(aes(y = RBC, x = gam, colour = day, group = id), arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  theme_bw() + 
  scale_color_viridis_c(limits = c(3, 21)) +
  labs(x = "Gametocyte per µL", y = "RBC per µL", color = "Days\npost-infection") +
  scale_y_continuous(labels = function(x) format(x, scientific = TRUE))

rlog_g.dc <- ggplot(exp_ss.df) +
  geom_point(aes(y = log10(RBC), x = gam)) +
  geom_path(aes(y = log10(RBC), x = gam, colour = day, group = id), arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  theme_bw() + 
  scale_color_viridis_c(limits = c(3, 21)) +
  labs(x = "Gametocyte per µL", y = "log10(RBC per µL)", color = "Days\npost-infection") +
  scale_y_continuous(labels = function(x) format(x, scientific = TRUE))

r_glog.dc <- ggplot(exp_ss.df) +
  geom_point(aes(y = RBC, x = log10(gam))) +
  geom_path(aes(y = RBC, x = log10(gam), colour = day, group = id), arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  theme_bw() + 
  scale_color_viridis_c(limits = c(3, 21)) +
  labs(x = "log10(Gametocyte) per µL", y = "RBC per µL", color = "Days\npost-infection") +
  scale_y_continuous(labels = function(x) format(x, scientific = TRUE))

rlog_glog.dc <- ggplot(exp_ss.df) +
  geom_point(aes(y = log10(RBC), x = log10(gam))) +
  geom_path(aes(y = log10(RBC), x = log10(gam), colour = day, group = id), arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  theme_bw() + 
  scale_color_viridis_c(limits = c(3, 21)) +
  labs(x = "log10(Gametocyte) per µL", y = "log10(RBC) per µL", color = "Days\npost-infection") +
  scale_y_continuous(labels = function(x) format(x, scientific = TRUE))
```

# plot together
```{r}
ggarrange(r_i.dc, rlog_i.dc, r_ilog.dc, rlog_ilog.dc,
          g_i.dc, glog_i.dc, g_ilog.dc, glog_ilog.dc,
          r_g.dc, rlog_g.dc, r_glog.dc, rlog_glog.dc, 
          ncol = 4, nrow = 3, align = "hv", common.legend = T)
ggsave(here("figures/plos-bio/exp_disease-curve.tiff"), units = "px", width = 2250, height = 1500, scale = 2, dpi=300,  bg = "white")
```

#===========================================#
# static competition
#===========================================#
#------- heat map ---------------#
# calculate fitness difference for 20 days
```{r}
# get dynamics
static.ls <- list.files(path = here("data/ci_static/"), pattern = "*.parquet", full.names = T)
static.ls <- lapply(static.ls, read_parquet)

# get fitness at day 20 (optimized for 20 days)
static_fitness.ls <- mclapply(static.ls, function(x){
  x %>% filter(time == 20 & variable %in% c("tau_cum1", "tau_cum2"))
})
static_fitness.df <- do.call(rbind, static_fitness.ls)

static_fitness.df <- tidyr::pivot_wider(static_fitness.df, names_from = "variable", id_cols = c("id_1", "id_2")) %>% 
  group_by(id_1, id_2) %>% 
  mutate(fitness_difference = tau_cum1-tau_cum2)
write.csv(static_fitness.df, here("data/ci_static.csv"))
```

# import and process data
```{r}
# import in dataset
static.df <- read.csv(here("data/ci_static.csv"))
ez_label <- read.csv(here("data/ez_label.csv"))

# join with labelling
static.df2 <- static.df %>% 
  left_join(select(ez_label, id_ci, label_ci_1 = label_ci), by = c("id_1" = "id_ci")) %>% 
  left_join(select(ez_label, id_ci, label_ci_2 = label_ci), by = c("id_2" = "id_ci")) %>% 
  select(label_ci_1, label_ci_2, fitness_difference) %>% 
  mutate(label_ci_1 = gsub("log", "log10", label_ci_1),
         label_ci_2 = gsub("log", "log10", label_ci_2))

# get reverse order, which is simply invovles switching the cues around the multiplying the fitness by negative 1
static.df3 <- static.df2
names(static.df3) <- c("label_ci_2", "label_ci_1", "fitness_difference")
static.df3$fitness_difference <- static.df2$fitness_difference * -1

# join
static.df4 <- rbind(static.df2, static.df3)

# get mean
static.mean <- static.df4 %>% 
  distinct(label_ci_1, label_ci_2, .keep_all = T) %>% 
  group_by(label_ci_1) %>% 
  summarize(mean_fitness = mean(fitness_difference, na.rm = T))
  

# join static.df4 with mean for order sorting
static.df5 <- static.df4 %>% 
  left_join(static.mean, by = "label_ci_1") %>% 
  mutate(label_ci_1 = fct_reorder(label_ci_1, mean_fitness),
         label_ci_2 = fct_relevel(label_ci_2, levels(static.df5$label_ci_1)))
```

# plot
```{r}
# heatmap
static.pl1 <- ggplot(data = static.df5, aes(x = label_ci_2, y = label_ci_1, mean_fitness, fill = fitness_difference))+
  geom_tile(color = "black") +
  scale_fill_gradient2(low = "#fc8d59", high = "#4575b4", mid = "white", 
   midpoint = 0, space = "Lab", lim = c(-0.95, 0.95), name="Fitness\ndifference") +
  theme_minimal() +  
  theme(legend.position = "left",
  axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
  panel.grid.major = element_blank(),
  panel.border = element_blank(),
  panel.background = element_blank(),
  text = element_text(size = 15),
  axis.ticks = element_blank(),
  plot.margin=margin(r = -1, l = -1, unit = "pt")) + 
  labs(x = "Strain 2 cue", y = "Strain 1 cue") +
  coord_fixed()

# mean 
static.pl2 <- ggplot() +
  geom_bar(data =  static.mean, aes(y = fct_reorder(label_ci_1, mean_fitness), x = mean_fitness), stat = "identity") +
  labs(y = "", x = "Mean fitness\ndifference") +
  theme_bw() +
  theme(plot.margin = margin(l = 0),
       panel.grid.major = element_blank(),
  panel.background = element_blank(),
  text = element_text(size = 15),
    axis.text.y=element_blank(),
    axis.ticks.y=element_blank())

ggarrange(static.pl1, static.pl2, align = "hv", widths = c(1, 0.2))
ggsave(here("figures/plos-bio/static_competition_a.tiff"), units = "px", width = 2250, height = 1500, scale = 1.2, dpi=300,  bg = "white")
```

#------ effect cue perception -------#
## logging
```{r}
# get non-logged pairings
static_nolog <- static.df2 %>% 
  mutate(cue_1 = trimws(gsub("\\ .*", "", label_ci_1)),
         log_1 = case_when(
           str_detect(label_ci_1, "log") ~ "log",
           str_detect(label_ci_1, "log", negate = T) ~ "none")) %>% 
  filter(log_1 == "none")

static_log <- static.df2 %>% 
  mutate(cue_1 = trimws(gsub("\\ .*", "", label_ci_1)),
         log_1 = case_when(
           str_detect(label_ci_1, "log") ~ "log",
           str_detect(label_ci_1, "log", negate = T) ~ "none")) %>% 
  filter(log_1 == "log")

static_log.df <- left_join(
  select(static_nolog, cue_1, label_ci_2, log_1, None = fitness_difference),
  select(static_log, cue_1, label_ci_2, log_1, Log = fitness_difference),
  by = c("cue_1", "label_ci_2")) %>% 
  filter(!is.na(None) & !is.na(Log)) %>% 
  mutate(classification = ifelse(Log > None, "Logged better", "Not logged better"))
```

# combined
```{r}
static_nocomb <- static.df2 %>% 
  mutate(cue_1 = ifelse(
    str_detect(label_ci_1, "sum"), "I+Ig",
    trimws(gsub("+1.*|\\ log", "", label_ci_1))
  ),
  log_1 = case_when(
           str_detect(label_ci_1, "log") ~ "log",
           str_detect(label_ci_1, "log", negate = T) ~ "none"),
    comb_1 = case_when(
    str_detect(label_ci_1, "1+|sum") ~ "comb",
    str_detect(label_ci_1, "1+|sum", negate = T) ~ "none" 
  )) %>% 
  filter(comb_1 == "none")

static_comb <- static.df2 %>% 
  mutate(cue_1 = ifelse(
    str_detect(label_ci_1, "sum"), "I+Ig",
    trimws(gsub("+1.*|\\ log", "", label_ci_1))
  ),
  log_1 = case_when(
           str_detect(label_ci_1, "log") ~ "log",
           str_detect(label_ci_1, "log", negate = T) ~ "none"),
    comb_1 = case_when(
    str_detect(label_ci_1, "1+|sum") ~ "comb",
    str_detect(label_ci_1, "1+|sum", negate = T) ~ "none" 
  )) %>% 
  filter(comb_1 == "comb")
  
static_comb.df <- left_join(
  select(static_nocomb, cue_1, label_ci_2, log_1, Self = fitness_difference),
  select(static_comb, cue_1, label_ci_2, log_1, Total = fitness_difference),
  by = c("cue_1", "log_1", "label_ci_2")) %>% 
  filter(!is.na(Total) & !is.na(Self)) %>% 
  mutate(classification = ifelse(Total > Self, "Total better", "Self better"))
```

# plot
```{r}
static_log.pl <- ggpaired(static_log.df, cond1 = "None", cond2 = "Log", line.color = "classification", alpha = 0.5) +
  labs(x = "Cue perception", y = "Fitness difference", color = "Effect") +
  scale_color_manual(values = c("Not logged better" = "#fc8d59", "Logged better" = "#4575b4")) +
  stat_compare_means(paired = TRUE, hjust = -0.1)

static_comb.pl <- ggpaired(static_comb.df, cond1 = "Total", cond2 = "Self", line.color = "classification", alpha = 0.5) +
  labs(x = "Cue perception", y = "Fitness difference", color = "Effect") +
  scale_color_manual(values = c("Total better" = "#fc8d59", "Self better" = "#4575b4")) +
  stat_compare_means(paired = TRUE, hjust = -0.2)

ggarrange(static_log.pl, static_comb.pl, ncol = 1, nrow = 2, align = "v")
ggsave(here("figures/plos-bio/static_competition_b.tiff"), units = "px", width = 1000, height = 2000, scale = 1.2, dpi=300,  bg = "white")
```


#===========================================#
# invasion analysis
#===========================================#
# import in data (already 20 days )
```{r}
invade.df <- read.csv(here("data/ci_invasion.csv"))
```


# process data for invasion matrix
```{r}
invade.mat <- invade.df %>% 
  group_by(V1 = pmin(mut_id, res_id), V2 = pmax(mut_id, res_id)) %>% # group by cue competition, irregardless of order
  mutate(id_alt = paste0(V1, V2),
         invade = case_when(
           fitness > 0 ~ "invade",
           fitness < 0 ~ "not invade"
         )) %>% 
  group_by(id_alt) %>% 
  mutate(
    mut_is_V1 = case_when(
    mut_id == V1 ~ "V1_invade",
    mut_id != V1 ~ "V1_invaded")) %>% 
  arrange(id_alt) %>% 
  select(fitness, V1, V2, id_alt, invade, mut_is_V1) %>% 
  tidyr::pivot_wider(names_from = mut_is_V1, values_from = fitness) %>% 
  group_by(id_alt) %>% 
  mutate(V1_invade2 = gsub("NA", "", paste0(V1_invade, collapse = "")),
         V1_invaded2 = gsub("NA", "", paste0(V1_invaded, collapse = ""))) %>% 
  distinct(id_alt, .keep_all = T) %>% 
  mutate(
    category = case_when(
    V1_invade2 > 0 & V1_invaded2 > 0 ~ "Mutual invasion",
    V1_invade2 > 0 & V1_invaded2 < 0 ~ "Only strain 1 invasion",
    V1_invade2 < 0 & V1_invaded2 > 0 ~ "Only strain 2 invasion",
    V1_invade2 < 0 & V1_invaded2 < 0 ~ "Mutual non-invasion"
  )) %>% 
  select(V1, V2, invasion = category)

invade.df %>% filter(mut_id == "G-i_none")
invade.df %>% filter(res_id == "G-i_none")
```


```{r}
# for plotting, need to get all same cue vs same cue, which we will set to NA
invade.NA <- cbind.data.frame(`V1` = unique(invade.mat$V1),
      `V2` = unique(invade.mat$V1),
      invasion = NA)

invade.mat2 <- rbind(invade.mat, invade.NA)

# get label
invade.mat3 <- invade.mat2 %>% 
  left_join(select(ez_label, id_ci, V1_label = label_ci), by = c("V1" = "id_ci")) %>% 
  left_join(select(ez_label, id_ci, V2_label = label_ci), by = c("V2" = "id_ci")) %>% 
  mutate(V1_label = gsub("log", "log10", V1_label),
                                      V2_label = gsub("log", "log10", V2_label))


invade.mat4 <- rbind(
  select(invade.mat3, V1_label, V2_label, invasion),
  select(invade.mat3, V2_label = V1_label, V1_label = V2_label) %>% mutate(invasion = NA)) %>%
  mutate(
    invasion_2 = case_when(
    invasion == "Mutual invasion" ~ "Mutual invasion",
    invasion == "Only strain 1 invasion" ~ "Competitive exclusion\nof another cue",
    invasion == "Only strain 2 invasion" ~ "Competitive exclusion\nby another cue"
  )) %>% 
  filter(!is.na(V1_label))

invade.mat4$V1_label <- factor(invade.mat4$V1_label, levels =  c("G log10", "G", "G1+G2", "I log10", "I", "I+Ig log10", "I+Ig", "I1+I2 log10", "I1+I2", "Ig log10", "Ig", "Ig1+Ig2", "R log10", "R", "sum log10", "sum"))
invade.mat4$V2_label <- factor(invade.mat4$V2_label, levels =  c("G", "G1+G2", "I log10", "I", "I+Ig log10", "I+Ig", "I1+I2 log10", "I1+I2", "Ig log10", "Ig", "Ig1+Ig2", "R log10", "R", "sum log10", "sum", "G log10"))
```


# plot invasion matrix
```{r}
invasion.pl1 <- ggplot(data = invade.mat4, aes(x = V2_label, y = V1_label, fill = invasion_2)) +
  geom_tile(color = "black") +
  theme_minimal() +  
  theme(legend.position = "right",
  axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
  panel.grid.major = element_blank(),
  panel.border = element_blank(),
  panel.background = element_blank(),
  text = element_text(size = 15),
  axis.ticks = element_blank(),
  plot.margin=margin(r = 0)) + 
  labs(fill = "Invasibility", x = "Competing cue", y = "Reference cue") +
  scale_x_discrete(limits = rev) +
  scale_y_discrete(limits = rev) +
  scale_fill_manual(values = c("Competitive exclusion\nof another cue" = "#4575b4",
                               "Competitive exclusion\nby another cue" = "#fc8d59",
                               "Mutual invasion" = "#fee090"),
                    na.value = "white") +
  theme(legend.position = "none")
```

# create summary bar chart
```{r}
# create a stacked barchart for summary
## filter out na
invade.matalt <- invade.mat3 %>% na.exclude()

# get frquency from both sides. Note when grouping for V2, from the perspective of cue 2, scenarrio when strain 2 invade = strain 1 invade
invade.matalt1 <- invade.matalt %>% group_by(V1_label, invasion) %>% 
  summarize(frequency_1 = n())

invade.matalt2 <- invade.matalt %>%
  mutate(invasion_alt = case_when(
    invasion == "Only strain 1 invasion" ~ "Only strain 2 invasion",
    invasion == "Only strain 2 invasion" ~ "Only strain 1 invasion",
    invasion == "Mutual invasion" ~ "Mutual invasion",
    invasion == "Mutual non-invasion" ~ "Mutual non-invasion"
  )) %>% 
  group_by(V2_label, invasion_alt) %>% 
  summarize(frequency_2 = n())     

# full join and sum. has confirmed all of them add up to 14 
invade.matalt3 <- full_join(invade.matalt1, invade.matalt2, by = c("V1_label" = "V2_label", "invasion" = "invasion_alt"))

invade.matalt3[is.na(invade.matalt3)] <- 0
invade.matalt4 <- invade.matalt3 %>% 
  mutate(freq = frequency_1 + frequency_2) %>% 
  mutate(temp = case_when(
    invasion == "Only strain 1 invasion" ~ freq
  )) %>% 
  group_by(V1_label) %>% 
  mutate(invade_1_freq = max(temp, na.rm = T)) %>% 
  mutate(invasion_2 = case_when(
    invasion == "Mutual invasion" ~ "Mutual invasion",
    invasion == "Only strain 1 invasion" ~ "Competitive exclusion\nof another cue",
    invasion == "Only strain 2 invasion" ~ "Competitive exclusion\nby another cue"
  ))
```



```{r}
invasion.pl2 <- ggplot() +
  geom_bar(data = invade.matalt4, aes(x = freq, y = reorder(V1_label, invade_1_freq), fill = forcats::fct_rev(factor(invasion_2, levels = c("Competitive exclusion\nof another cue", "Competitive exclusion\nby another cue", "Mutual invasion", "Mutual non-invasion")))), stat = "identity") +
  labs(x = "Frequency", fill = "Invasibility", y = "Cue") +
  theme_bw()  +
  scale_fill_manual(values = c("Competitive exclusion\nof another cue" = "#4575b4",
                               "Competitive exclusion\nby another cue" = "#fc8d59",
                               "Mutual invasion" = "#fee090")) +
  theme(text = element_text(size = 15))
```

# plot together
```{r}
ggarrange(invasion.pl1, invasion.pl2, align = "h", common.legend = T, widths = c(2, 1))
ggsave(here("figures/plos-bio/invasion_a.tiff"), units = "px", width = 2250, height = 1100, scale = 1.4, dpi=300,  bg = "white")
```

#---------------- invasion pairwise comparison-----------------#
## proces data
```{r}
# join invade df with label because I am lazy
invade.df2 <- invade.df %>% 
  left_join(select(ez_label, id_ci, label_ci_1 = label_ci), by = c("mut_id" = "id_ci"))

invade.df2 
```

# log
```{r}
# get non-logged pairings
invade_nolog <- invade.df2 %>% 
  mutate(cue_1 = trimws(gsub("\\ .*", "", label_ci_1)),
         log_1 = case_when(
           str_detect(label_ci_1, "log") ~ "log",
           str_detect(label_ci_1, "log", negate = T) ~ "none")) %>% 
  filter(log_1 == "none")


invade_log <- invade.df2 %>% 
  mutate(cue_1 = trimws(gsub("\\ .*", "", label_ci_1)),
         log_1 = case_when(
           str_detect(label_ci_1, "log") ~ "log",
           str_detect(label_ci_1, "log", negate = T) ~ "none")) %>% 
  filter(log_1 == "log")

invade_log.df <- left_join(
  select(invade_nolog, cue_1, res_id, log_1, None = fitness),
  select(invade_log, cue_1, res_id, log_1, Log = fitness),
  by = c("cue_1", "res_id")) %>% 
  filter(!is.na(None) & !is.na(Log)) %>% 
  mutate(classification = ifelse(Log > None, "Logged better", "Not logged better"))

invade_log
```

# combined
```{r}
invade_nocomb <- invade.df2 %>% 
  mutate(cue_1 = ifelse(
    str_detect(label_ci_1, "sum"), "I+Ig",
    trimws(gsub("+1.*|\\ log", "", label_ci_1))
  ),
  log_1 = case_when(
           str_detect(label_ci_1, "log") ~ "log",
           str_detect(label_ci_1, "log", negate = T) ~ "none"),
    comb_1 = case_when(
    str_detect(label_ci_1, "1+|sum") ~ "comb",
    str_detect(label_ci_1, "1+|sum", negate = T) ~ "none" 
  )) %>% 
  filter(comb_1 == "none")

invade_comb <- invade.df2 %>% 
  mutate(cue_1 = ifelse(
    str_detect(label_ci_1, "sum"), "I+Ig",
    trimws(gsub("+1.*|\\ log", "", label_ci_1))
  ),
  log_1 = case_when(
           str_detect(label_ci_1, "log") ~ "log",
           str_detect(label_ci_1, "log", negate = T) ~ "none"),
    comb_1 = case_when(
    str_detect(label_ci_1, "1+|sum") ~ "comb",
    str_detect(label_ci_1, "1+|sum", negate = T) ~ "none" 
  )) %>% 
  filter(comb_1 == "comb")
  
invade_comb.df <- left_join(
  select(invade_nocomb, cue_1, res_id, log_1, Self = fitness),
  select(invade_comb, cue_1, res_id, log_1, Total = fitness),
  by = c("cue_1", "log_1", "res_id")) %>% 
  filter(!is.na(Total) & !is.na(Self)) %>% 
  mutate(classification = ifelse(Total > Self, "Total better", "Self better"))
invade_comb.df
```

# plot
```{r}
invade_log.pl <- ggpaired(invade_log.df, cond1 = "None", cond2 = "Log", line.color = "classification", alpha = 0.5) +
  labs(x = "Cue perception", y = "Fitness difference", color = "Effect") +
  scale_color_manual(values = c("Not logged better" = "#fc8d59", "Logged better" = "#4575b4"))+
  stat_compare_means(paired = TRUE, hjust = -0)

invade_comb.pl <- ggpaired(invade_comb.df, cond1 = "Total", cond2 = "Self", line.color = "classification", alpha = 0.5) +
  labs(x = "Cue perception", y = "Fitness difference", color = "Effect") +
  scale_color_manual(values = c("Total better" = "#fc8d59", "Self better" = "#4575b4"))+
  stat_compare_means(paired = TRUE, hjust = -0)

ggarrange(invade_log.pl, invade_comb.pl, align = "h", ncol = 2)
ggsave(here("figures/plos-bio/invasion_b.tiff"), units = "px", width = 2250, height = 900, scale = 1.2, dpi=300,  bg = "white")
```


#===========================================#
# Cue performance across single, co-infection, static, and invasion
#===========================================#
```{r}
# import in all the ranks
si_opt.df <- read.csv(here("data/si_opt.csv"))
ci_opt.df <- read.csv(here("data/ci_opt.csv"))
static.df <- read.csv(here("data/ci_static.csv"))
invasion.df <- read.csv(here("data/ci_invasion.csv"))

# calculate mean fitness
static.mean ## already calculated
invasion.mean <- rbind(select(invasion.df, cue = mut_id, fitness),
      select(invasion.df %>% mutate(fitness2 = fitness * -1), cue = res_id, fitness = fitness2)) %>% 
  group_by(cue) %>% 
  summarize(mean_fitness = mean(fitness)) %>% 
  arrange(desc(mean_fitness))

# calculate ranks
si.rank <- si_opt.df %>% 
  left_join(select(ez_label, id_ci, id_si, label = ez_label_si), by = c("id" = "id_si")) %>% 
  mutate(rank = dense_rank(-fitness_20),
         model = "Single infection") %>% 
  select(rank, model, id = id_ci, label)

ci.rank <- ci_opt.df %>% 
  left_join(select(ci_fitness.df, value, label), by = "label") %>% 
  left_join(select(ez_label, id_ci, id_si, ez_label), by = c("id" = "id_ci")) %>% 
  mutate(rank = rank(-value),
         model = "Co-infection") %>% 
  select(rank, model, id, label = ez_label)

static.rank <- static.mean %>% 
  mutate(label_ci = gsub("log10", "log", label_ci_1)) %>% 
  left_join(select(ez_label, id_ci, label_ci, ez_label), by = c("label_ci" = "label_ci")) %>% 
  mutate(rank = rank(-mean_fitness),
         model = "Static mixed genotype") %>% 
  select(rank, model, id = id_ci, label = ez_label)

invasion.rank <- invasion.mean %>% 
  left_join(select(ez_label, id_ci, ez_label), by = c("cue" = "id_ci")) %>% 
  mutate(rank = rank(-mean_fitness),
         model = "Invasive mixed genotype") %>% 
  select(rank, model, id = cue, label = ez_label) %>% 
  mutate(label = gsub("\n", " ", paste0("   ", label)))

static.rank
# concatenate
fitness.rank <- rbind(
  si.rank, ci.rank, static.rank, invasion.rank
) %>% 
  mutate(model = fct_relevel(model, c("Single infection", "Co-infection", "Static mixed genotype", "Invasive mixed genotype")))

# highlight the good ones
fitness.rank_good <- fitness.rank %>% 
  filter(id %in% c("Ig-i_log", "I-i+Ig-i_log", "sum_log", "G-i_log"))
```

# plot
```{r}
library(ggbump)

ggplot() +
  geom_bump(data = fitness.rank, aes(x = model, y = rank, group = id), color = "grey") +
  geom_point(data = fitness.rank, aes(x = model, y = rank, group = id), color = "grey", size = 3) +
  geom_bump(data = fitness.rank_good, aes(x = model, y = rank, group = id, color = id)) +
  geom_point(data = fitness.rank_good, aes(x = model, y = rank, group = id, color = id), size = 3) +
  geom_text(data = invasion.rank, aes(x = model, y = rank, label = label), size = 3.5, hjust = 0, inherit.aes = F) +
  scale_color_viridis_d() +
  theme_minimal() +
  coord_cartesian(clip = "off") +
  scale_y_reverse() +
  expand_limits(x = 5.5) +
  theme(legend.position = "none") +
  labs(x = "Model", y = "Mean fitness rank")
ggsave(here("figures/plos-bio/fitness_rank.tiff"), units = "px", width = 2250, height = 1500, scale = 1, dpi=300,  bg = "white")
```


#-=====================#
# Partitioning best cue
#=====================-#
#------- single infection -----------#
# redo some optimization (lower fitness in no R than default)
```{r}
source(here("functions/chabaudi_si_clean_R.R"))
source(here("functions/chabaudi_si_clean_N.R"))
# I none
cl <- makeCluster(detectCores()); setDefaultCluster(cl = cl)
I_no_R <- optimParallel(
    par = rep(0.5,4), # start at 0.5x4
    fn = chabaudi_si_clean_R, 
    control = list(trace = 6, fnscale = -1),
    immunity = "tsukushi",
    parameters = parameters_tsukushi,
    time_range = seq(0, 20, by = 1e-3),
    cue_range =  seq(0, 6*(10^6), by = (6*(10^6))/5000),
    cue = "I",
    log_cue = "none",
    solver = "vode")
stopCluster(cl)
# 0.144021 -43.1046 2030.27 -524.686 
# 8.69589
```

# import and process data
```{r}
# import in data
si_partition.ls <- list.files(path = here("data/partition/si/"), pattern = "*.csv", full.names = T)
si_partition.ls <- lapply(si_partition.ls, read.csv)
si_partition.df <- do.call(rbind, si_partition.ls)

# combine with si fitness (default)
si_partition.df <- si_partition.df %>% left_join(select(si_fitness.df, id, fitness = value), by = "id")

# make longer
si_partition.df2 <- tidyr::pivot_longer(si_partition.df, c(fitness_R, fitness_N, fitness_W, fitness))

# calculate coefficient of variation. Also rename
si_partition.df2 <- si_partition.df2 %>% 
  group_by(name) %>% 
  mutate(cv = sd(value)/mean(value)*100,
         mean = mean(value),
         category = case_when(
           name == "fitness_R" ~ "No RBC limitation",
           name == "fitness_W" ~ "No targeted immunity",
           name == "fitness_N" ~ "No indiscriminate\nimmunity",
           name == "fitness" ~ "Default"
         ))

```

# plot
```{r}
library(ungeviz)
# raw fitness
si_partition.pl1 <- ggplot() +
  geom_vpline(data = si_partition.df2, aes(y = fct_reorder(category, mean), x = mean, group = category, color = category), show.legend = F, size = 1) +
  geom_point(data = si_partition.df2, aes(y = fct_reorder(category, mean), x = value), size = 2, alpha = 0.7) +
  geom_line(data = si_partition.df2, aes(y = fct_reorder(category, mean), x = value, group = id), alpha = 0.2) +
  labs(x = "Fitness", y = "Conditions") +
  theme_bw()

# coefficient of variation
si_partition.pl2 <- ggplot() +
  geom_bar(data = si_partition.df2, aes(y = fct_reorder(category, mean), x = cv), stat = "identity") +
  labs(x = "Coefficient of\nvariation (%)", y = "") +
  theme_bw() +
  theme(axis.title.y=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank())

si_partition.pl <- ggarrange(si_partition.pl1, si_partition.pl2, widths = c(1, 0.3), align = "h")
si_partition.pl

ggsave(here("figures/plos-bio/partition_fitness.tiff"), width = 7, height = 4)
```

#------- consequences of no targeted immunity ------------#
# get dynamics of no targeted immunity
```{r}
get_dyn <- function(df){
  
  source(here("functions/chabaudi_si_clean_W.R"))
  id <- df$id
  cue <- df$cue
  log <- df$log
  par <- c(df$var_W1, df$var_W2, df$var_W3, df$var_W4)
  cue_range <- seq(df$low, df$high, by = df$by)
  
  # get dynamics
  dyn <- chabaudi_si_clean_W(
    parameters_cr = par,
    immunity = "tsukushi",
    parameters = parameters_tsukushi,
    time_range = seq(0, 20, by = 1e-3),
    cue_range =  cue_range,
    cue = cue,
    log_cue = log,
    solver = "vode",
    dyn = T
  )
  
  # combine
  dyn2 <- cbind(dyn, id = id, cue = cue, log = log)
  
  write_parquet(dyn2, paste0(here("data/partition/si_dyn/"), id, "_noW_dyn.parquet"))
  
}
```

# get df to run
```{r}
# join with cue_range
cue_range_si.df <- read.csv(here("data/cue_range_si.csv"))
si_partition.df3 <- si_partition.df %>% left_join(select(cue_range_si.df, low, high, by, id), "id")

# lapply loop
si_partition.ls <- split(si_partition.df3, seq(nrow(si_partition.df3)))
mclapply(si_partition.ls, get_dyn)
```

# process dataframe
```{r}
# import in dataframe
no_W.ls <- list.files(here("data/partition/si_dyn/"), pattern = "*noW_dyn.parquet", full.names = T)
no_W.df <- lapply(no_W.ls, read_parquet)
no_W.df <- do.call(rbind, no_W.df)

# combine with ez label
ez_label <- read.csv(here("data/ez_label.csv"))
no_W.df <- left_join(no_W.df, ez_label, by = c("id" = "id_si"))

# get conversion rate 
no_W.cr <- no_W.df %>% filter(variable == "cr")
no_W.I <- no_W.df %>% filter(variable == "I")

# get default conversion rate dynamics
si_dyn.df <- left_join(si_dyn.df, ez_label, by = c("id" = "id_si"))
si_dyn.cr <- si_dyn.df %>% filter(variable == "cr")
si_dyn.I <- si_dyn.df %>% filter(variable == "I")
```

# plot conversion rate
mechanism: targeted immunity led to lower parasite density in the initial stages, which prevents parasites from making the switch from no conversion rate to high conversion rate. When parsite density undergoes drastic increase at the beginning due to lower immunity, this presents a higher degree of signal that allows parasite to make the switch appropriately
```{r}
partition_cr.pl <- ggplot() +
  geom_line(data = no_W.cr, aes(x = time, y= value, color = "No targeted immunity")) +
  geom_line(data = si_dyn.cr, aes(x = time, y= value, color = "Default")) +
  facet_wrap(~ez_label_si, ncol = 5) +
  xlim(0, 20) +
  geom_vline(xintercept = 7) +
  labs(x = "Time (days)", y = "Conversion rate", color = "Condition") +
  theme_bw()

no_W.cr
```

#----- cue state --------------#

# function to get cue states
```{r}
# function to get cue states
get_cue_state <- function(df){
  cue <- trimws(gsub("_log|_none", "", unique(df$id)))
  if(cue != "I+Ig"){
  df2 <- df %>% filter(variable == cue)
  if(str_detect(unique(df$id), "log")){
    df2 <- df2 %>% 
      mutate(value = log10(value))
  }
  }
  
  if(cue == "I+Ig"){
    df2 <- df %>% filter(variable %in% c("I", "Ig")) %>% 
      group_by(time) %>% 
      mutate(value = sum(value))
    
    if(str_detect(unique(df$id), "log")){
    df2 <- df2 %>% 
      mutate(value = log10(value))
  }
  }
  
  df2$value[df2$value == -Inf] <- 0
  
  write_parquet(df2, paste0(here("data/partition/si_default_state/"), unique(df$id), "_noW_state.parquet"))
}
```

# run function
```{r}
# split dynamics based on id
no_W.split <- split(no_W.df, no_W.df$id)

# run function
mclapply(no_W.split, get_cue_state)

# get dataframe
no_W.state <- list.files(here("data/partition/si_state/"), pattern = "*.parquet", full.names = T)
no_W.state <- lapply(no_W.state, read_parquet)
no_W.state <- do.call(rbind, no_W.state)
no_W.state$value[no_W.state$value < 0] <- 0

# get same for si infection
default.split <- split(si_dyn.df, si_dyn.df$id)
mclapply(default.split, get_cue_state)
default.state <- list.files(here("data/partition/si_default_state/"), pattern = "*.parquet", full.names = T)
default.state <- lapply(default.state, read_parquet)
default.state <- do.call(rbind, default.state)
default.state$value[default.state$value < 0] <- 0

# manually correct non-logging
I_Ig.corr <- no_W.state %>% filter(id == "I+Ig_log") %>% 
  mutate(value = log10(value))
I_Ig.corr$value[I_Ig.corr$value < 0] <- 0

no_W.state2 <- no_W.state %>% filter(id != "I+Ig_log")
no_W.state2 <- no_W.state2 %>% rbind(no_W.state2, I_Ig.corr)
```

# plot
absence of targeted immunity led to drastic increase in parasite density in early phases of infection. This produces high signal intensity for parasite and host-based cues, especially non-logged ones, which allows for state differentation. While this can be viewed as a modelling artifiact, it should be noted that the logged cues seldom changed as these changes in early infection did little to alter the actual early signal intensity sensed by the parasite. In an environment where there is heterogeneity in host response, and thus, signal, logging allows for parasites to adapt optimal strategy whereas non-logged cues must contend with sensitivity to immunity.
```{r}
# function to individually plot stuff
plot_state <- function(df1, df2){
  
  # plot state dynamics
  state_pl <- ggplot() +
  geom_line(data = df1, aes(x = time, y = value, color = name, group = name)) +
  facet_wrap(~ez_label_si, scales = "free") +
  xlim(1,20) +
  theme_bw() +
  theme(legend.position="none") +
  labs(x = "", y = "Cue") +
  scale_y_continuous(labels = function(x) format(x, scientific = TRUE)) +
  scale_color_manual(values =c("Default" = "#4575b4", "No targeted\nimmunity" = "#fc8d59"))
  
  # plot conversion rate dynamics
  cr_pl <- ggplot() +
  geom_raster(data = df2, aes(x = time, y = name, fill = value)) +
  xlim(1,20) +
  theme_bw() +
    labs(x = "Time (days)") +
  theme(axis.title.y=element_blank(),
        axis.ticks.y=element_blank(),
        legend.position = "none") +
    scale_fill_viridis_c(lim = c(0, 1))
  
  # arrange
  ggarrange(state_pl, cr_pl, ncol = 1, nrow = 2, align = "v", heights = c(1, 0.4))
  ggsave(paste0(here("figures/plos-bio/partition/"), unique(df1$id), ".tiff"), width = 4.5, height = 3.5)
}
```

# split
```{r}
# combine state
noW_default.state <- left_join(
  select(no_W.state2, time, `No targeted\nimmunity` = value, id, ez_label_si), 
  select(default.state %>% filter(time <= 20), time, `Default` = value, id, ez_label_si), by = c("time", "id", "ez_label_si"))

noW_default.state2 <- tidyr::pivot_longer(noW_default.state, c(`No targeted\nimmunity`, `Default`))
# combine conversion raster
noW_default.cr <- left_join(
  select(no_W.cr, time, `No targeted\nimmunity` = value, id, ez_label_si), 
  select(si_dyn.cr %>% filter(time <= 20), time, `Default` = value, id, ez_label_si), by = c("time", "id", "ez_label_si"))
noW_default.cr2 <- tidyr::pivot_longer(noW_default.cr, c(`No targeted\nimmunity`, `Default`))

# split
noW_default_state.ls <- split(noW_default.state2, noW_default.state2$id)
noW_default_cr.ls <- split(noW_default.cr2, noW_default.cr2$id)

# run function
mapply(plot_state, noW_default_state.ls, noW_default_cr.ls)
```

#-------- reaction norms of default vs optimized ------------#
# get reaction norm and rug data
```{r}
source(here("functions/par_to_df.R"))

# Gametocyte
g_log.rn <- par_to_df(par = c(1.211521,	-3.936778,	-1.312944,	-1.285713), cue_range = seq(0, log10(6*(10^4)), by = (log10(6*(10^4)))/5000))
g_log.rn2 <- par_to_df(par = c(1.393860539,	-4.253007616,	-0.313947029,	-2.000857344), cue_range = seq(0, log10(6*(10^4)), by = (log10(6*(10^4)))/5000))

g.rn <- par_to_df(par = c(0.04061288,	-9.31445958,	74.13015506,	-431.5984364), cue_range = seq(0, 6*(10^4), by = (6*(10^4))/5000))
g.rn2 <- par_to_df(par = c(0.541729073,	-3.904616443,	0.87487412,	-0.694177021), cue_range = seq(0, 6*(10^4), by = (6*(10^4))/5000))

# I+Ig
I_Ig_log.rn <- par_to_df(par = c(3.594042,	4.157744,	-13.530672,	2.599905), cue_range = seq(0, log10(6*(10^6)), by = (log10(6*(10^6)))/5000))
I_Ig_log.rn2 <- par_to_df(par = c(63.71893822,	-87.77671601,	-56.55475514,	-66.02209549), cue_range = seq(0, log10(6*(10^6)), by = (log10(6*(10^6)))/5000))

I_Ig.rn <- par_to_df(par = c(0.3159297,	-46.1104558,	1250.752908,	-6.1982093), cue_range = seq(0, 6*(10^6), by = (6*(10^6))/5000))
I_Ig.rn2 <- par_to_df(par = c(0.731982784,	-21.69799449,	149.7841876,	17.02551769), cue_range = seq(0, 6*(10^6), by = (6*(10^6))/5000))

# convert log to non-logged scale
g_log.rn$cue_range <- 10^(g_log.rn$cue_range)
g_log.rn2$cue_range <- 10^(g_log.rn2$cue_range)
I_Ig_log.rn$cue_range <- 10^(I_Ig_log.rn$cue_range)
I_Ig_log.rn2$cue_range <- 10^(I_Ig_log.rn2$cue_range)

# get rug
g_log.rug <- default.state %>% 
  filter(label_si == "G log") %>% 
  mutate(value = 10^value) %>% 
  select(label_si, value)

g_log.rug2 <- no_W.state %>% 
  filter(label_si == "G log") %>% 
  mutate(value = 10^value) %>% 
  filter(value <= 6*(10^4)) %>% 
  select(label_si, value)

I_Ig_log.rug <- default.state %>% 
  filter(label_si == "I+Ig log") %>% 
  select(label_si, value)

I_Ig_log.rug2 <- no_W.state %>% 
  filter(label_si == "I+Ig log") %>% 
  select(label_si, value)

g.rug <- default.state %>% 
  filter(label_si == "G") %>% 
  select(label_si, value)

g.rug2 <- no_W.state %>% 
  filter(label_si == "G" & value <= 6*(10^4)) %>% 
  select(label_si, value)

I_Ig.rug <- default.state %>% 
  filter(label_si == "I+Ig") %>% 
  select(label_si, value)

I_Ig.rug2 <- no_W.state %>% 
  filter(label_si == "I+Ig") %>% 
  select(label_si, value)

# get rug limits
rug_lim <- rbind(g_log.rug,
                 g_log.rug2,
                 I_Ig_log.rug,
                 I_Ig_log.rug2,
                 g.rug,
                 g.rug2,
                 I_Ig.rug,
                 I_Ig.rug2) %>% 
  group_by(label_si) %>% 
  summarize(max = max(hablar::s(value), na.rm = T),
            min = min(hablar::s(value), na.rm = T))

# combine and filter
rn <- rbind(
  cbind(g_log.rn, label_si = "G log", condition = "Default"),
  cbind(g_log.rn2, label_si = "G log", condition = "No targeted\nimmunity"),
  cbind(g.rn, label_si = "G", condition = "Default"),
  cbind(g.rn2, label_si = "G", condition = "No targeted\nimmunity"),
  cbind(I_Ig_log.rn, label_si = "I+Ig log", condition = "Default"),
  cbind(I_Ig_log.rn2, label_si = "I+Ig log", condition = "No targeted\nimmunity"),
  cbind(I_Ig.rn, label_si = "I+Ig", condition = "Default"),
  cbind(I_Ig.rn2, label_si = "I+Ig", condition = "No targeted\nimmunity")
) %>% 
  left_join(rug_lim, by = "label_si") %>% 
  group_by(label_si) %>% 
  filter(cue_range <= max & cue_range >= min)

# combine rug
rug <- rbind(cbind(g_log.rug, condition = "Default"),
             cbind(g_log.rug2, condition = "No targeted\nimmunity"),
             cbind(g.rug, condition = "Default"),
             cbind(g.rug2, condition = "No targeted\nimmunity"),
             cbind(I_Ig_log.rug, condition = "Default"),
             cbind(I_Ig_log.rug2, condition = "No targeted\nimmunity"),
             cbind(I_Ig.rug, condition = "Default"),
             cbind(I_Ig.rug2, condition = "No targeted\nimmunity"))

# cobine with ezlabel
rn2 <- rn %>% left_join(ez_label, by = "label_si")
rug2 <- rug %>% left_join(ez_label, by = "label_si")

# filter rug
default.rug <- rug2 %>% filter(condition == "Default")
no.rug <- rug2 %>% filter(condition == "No targeted\nimmunity")
```

# plot
```{r}
ggplot() +
  geom_line(data = rn2, aes(x = cue_range, y = cr, color = condition)) +
  geom_rug(data = default.rug, aes(x = value, color = condition), sides = "b") +
  geom_rug(data = no.rug, aes(x = value, color = condition), sides = "t") +
  facet_wrap(~fct_relevel(ez_label_si, c("Gametocyte log10", "Gametocyte", "Asexual&sexual\niRBC log10", "Asexual&sexual iRBC")), scales = "free_x") +
  scale_x_continuous(labels = function(x) format(x, scientific = TRUE)) +
  theme_bw() +
  scale_color_manual(values =c("Default" = "#4575b4", "No targeted\nimmunity" = "#fc8d59")) +
  ylim(0, 1.1) +
  labs(x = "Cue range", y = "Conversion rate", color = "Condition")

ggsave(here("figures/plos-bio/partition_rn.tiff"), width = 7.5, height = 6)
```

# get conversion rate legend
```{r}
noW_default.cr %>% filter(id == "G_log") %>% 
ggplot() +
  geom_raster(aes(x = time, y = id, fill = Default)) +
  xlim(1,20) +
  theme_bw() +
    labs(x = "Time (days)",
         fill = "Conversion rate") +
  theme(axis.title.y=element_blank(),
        axis.ticks.y=element_blank(),) +
    scale_fill_viridis_c(lim = c(0, 1))

ggsave(here("figures/plos-bio/cr_legend.tiff"))
```

#================================#
# Disease curves for single, co-infection, and invasion
#===============================#
# get data for disease curves
```{r}
# single infection dynamics
si_dyn.df <- read_parquet(here("data/si_dyn/si_dyn_30.parquet")) 

# co-infection dynamics (mon-cue)
ci_dyn.df <- read_parquet(here("data/ci_dyn/ci_dyn.parquet"))

# dual cue dynamics
dual_dyn.df <- read_parquet(here("data/dual_cue_dyn/dual_cue_dyn.parquet"))
```

#------- single cue comparison ---------------#
# process data
```{r}
# get classification
si_cue.dv <- si_fitness.df %>% 
  mutate(classification = case_when(
    value > 9.2 ~ "High-performing",
    value <= 9.2 ~ "Poor-performing"
  ))

# process dynamics -> turn skinny
si_dc.df <- si_dyn.df %>% 
  filter(variable == "I" | variable == "Ig" | variable == "R") %>% 
  tidyr::pivot_wider(names_from = variable, values_from = value) %>% 
  mutate(total = I+Ig)

# join with classificaiton
si_dc.df2 <- si_dc.df %>% left_join(select(si_cue.dv, id, classification), by = "id")
si_cue.dv
# split into top erforming and poor-performing cues
si_dc.high <- si_dc.df2 %>% filter(classification == "High-performing")
si_dc.poor <- si_dc.df2 %>% filter(classification == "Poor-performing")

# join high performing with label
si_dc.high <- si_dc.high %>% left_join(ez_label %>% distinct(label_si, .keep_all = T), by = c("id" = "id_si"))

#write_parquet(si_dc.high, here("data/disease_curve/si_dc_high.parquet"))
#write_parquet(si_dc.poor, here("data/disease_curve/si_dc_poor.parquet"))
```

# plot
```{r}
si_dc.poor <- read_parquet(here("data/disease_curve/si_dc_poor.parquet"))
si_dc.high <- read_parquet(here("data/disease_curve/si_dc_high.parquet"))

# plot
si_dc.pre <- ggplot() +
  geom_path(data = si_dc.poor, aes(x= total, y = R, group = id), color = "dark grey", arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  labs(color = "Single infection\ngood performing cues", x = "Asexual & sexual iRBC per µL", y = "RBC per µL") +
  theme_bw()+ scale_y_continuous(labels = function(x) format(x, scientific = TRUE, accuracy = 0.1)) +
  guides(shape = FALSE)

si_dc.pl <- si_dc.pre +
  geom_point(data = si_dc.high %>% filter(row_number() %% 1000 ==0), aes(x = total, y = R, color = ez_label, shape = ez_label), size = 3) +
  geom_path(data = si_dc.high, aes(x= total, y = R, group = ez_label, color = ez_label), size = 1, arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  scale_color_manual(values=c( "#4575b4", "#fc8d59", "#fdcb44", "#91bfdb"))  +
  theme(legend.position = "top") +
  guides(color=guide_legend(nrow=2,byrow=TRUE))
```

#---------- co-infection monocue -------------#
```{r}
# get relevent variables
ci_dc.df <- ci_dyn.df %>% 
  filter(variable == "I1" | variable == "Ig1" | variable == "R")

# morph into skinny format
ci_dc.df <- tidyr::pivot_wider(ci_dc.df, names_from = variable, values_from = value, id_cols = c(time, label)) %>% 
  mutate(total = I1+Ig1)

# good cue bad cue
ci_cue.dv <- ci_fitness.df %>% 
  mutate(classification = case_when(
    value > 2.25 ~ "High-performing",
    value <= 2.25 ~ "Poor-performing"
  ))

# join with classificaiton
ci_dc.df2 <- ci_dc.df %>% left_join(ci_cue.dv, by = "label")

# split into top erforming and poor-performing cues
ci_dc.high <- ci_dc.df2 %>% filter(classification == "High-performing")
ci_dc.poor <- ci_dc.df2 %>% filter(classification == "Poor-performing")

# join high performing with label
ci_dc.high2 <- ci_dc.high %>% left_join(ez_label, by = c("label" = "label_ci"))

#write_parquet(ci_dc.high2, here("data/disease_curve/ci_dc_high.parquet"))
#write_parquet(ci_dc.poor, here("data/disease_curve/ci_dc_poor.parquet"))
```

# plot
```{r}
ci_dc.poor <- read_parquet(here("data/disease_curve/ci_dc_poor.parquet"))
ci_dc.high2 <- read_parquet(here("data/disease_curve/ci_dc_high.parquet"))

# plot
ci_dc.pre <- ggplot() +
  geom_path(data = ci_dc.poor, aes(x= total, y = R, group = label), color = "dark grey", arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  labs(color = "Co-infection\ngood performing cues", x = "Asexual & sexual iRBC per µL", y = "RBC per µL") +
  theme_bw()+ scale_y_continuous(labels = function(x) format(x, scientific = TRUE, accuracy = 0.1)) +
  guides(shape = FALSE)

ci_dc.pl <- ci_dc.pre +
  geom_point(data = ci_dc.high2 %>% filter(row_number() %% 1000 ==0), aes(x = total, y = R, color = ez_label, shape = ez_label), size = 3) +
  geom_path(data = ci_dc.high2, aes(x= total, y = R, group = ez_label, color = ez_label), size = 1, arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  scale_color_manual(values=c( "#4575b4", "#fc8d59", "#fdcb44", "#91bfdb")) +
  theme(legend.position = "top") +
  guides(color=guide_legend(nrow=2,byrow=TRUE))

```

#--------- dual cue --------------------------#
# process data
```{r}
# turn skinny
dual_dc.df <- dual_dyn.df %>% 
  mutate(label_alt = paste(label, "+" , label_b)) %>% 
  select(label_alt, time, variable, value) %>% 
  filter(variable == "I" | variable == "Ig" | variable == "R") %>% 
  distinct(label_alt, time, variable, .keep_all = T)

dual_dyn.df

dual_dc.df2 <- dual_dc.df %>% 
  tidyr::pivot_wider(names_from = variable, values_from = value, id_cols = c(time, label_alt)) %>%
  mutate(total = I+Ig)

write_parquet(dual_dc.df2, here("data/disease_curve/dual_dc.parquet"))

# good dual cue -> list of good performing dual cues that encompass wide variety of cues
selected_dual_cue <- c("R log + I log", "R + Ig log", "G log + I log", "G log + Ig log", "Ig + I log")
bad_dual_cue <- c("G + I", "R + Ig", "R log + Ig", "G + R", "G + R log", "G + Ig", "Ig + I", "R + I", "R log + I")

# get classification -> R log10 + I log10 as the only good one
dual_dc.high <- dual_dc.df2 %>% filter(label_alt %in% selected_dual_cue) %>% 
  mutate(label_alt = gsub("log", "log10", label_alt))
dual_dc.poor <- dual_dc.df2 %>% filter(label_alt %in% bad_dual_cue) %>% 
  mutate(label_alt = gsub("log", "log10", label_alt))
#write_parquet(dual_dc.high, here("data/disease_curve/dual_dc_high.parquet"))
#write_parquet(dual_dc.poor, here("data/disease_curve/dual_dc_poor.parquet"))

```

# plot
```{r}
dual_dc.high <- read_parquet(here("data/disease_curve/dual_dc_high.parquet"))
dual_dc.poor <- read_parquet(here("data/disease_curve/dual_dc_poor.parquet"))

dual_dc.high2 <- dual_dc.high %>% 
  filter(label_alt == "R log10 + I log10")

# add 
dual_dc.pre <- ggplot() +
  geom_path(data = dual_dc.poor, aes(x= total, y = R, group = label_alt), color = "dark grey", arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  labs(color = "High-performing\ndual cues per µL", x = "Asexual & sexual iRBC", y = "RBC per µL") +
  theme_bw()+ scale_y_continuous(labels = function(x) format(x, scientific = TRUE, accuracy = 0.1)) +
  guides(shape = FALSE)


dual_dc.pl <- dual_dc.pre +
  geom_point(data = dual_dc.high2 %>% filter(row_number() %% 1000 ==0), aes(x = total, y = R, shape = label_alt), color = "#4575b4", size = 3) +
  geom_path(data = dual_dc.high2, aes(x= total, y = R, group = label_alt), color = "#4575b4", size = 1, arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  theme(legend.position = "none") +
  guides(color = guide_legend(override.aes = list(size = 0.1)))
```

#--------- co-infection static ----------------#
```{r}
# import in dynamics data
static_dyn.ls <- list.files(path = here("data/ci_static/"), pattern = "*.parquet", full.names = T)
static_dyn.ls <- lapply(static_dyn.ls, read_parquet)

# filter variable and transform
static_dyn.ls2 <- mclapply(static_dyn.ls, function(x){
  x %>% 
    filter(variable == "I1" | variable == "Ig1" | variable == "I2" | variable == "Ig2" | variable == "R") %>% 
    mutate(id_alt = paste(id_1, id_2)) %>% 
    tidyr::pivot_wider(names_from = variable, values_from = value, id_cols = c(time, id_alt)) %>%
  mutate(total1 = I1+Ig1, total2 = I2+Ig2)
})

static_dc.df <- do.call(rbind, static_dyn.ls2)
static_dc.df <- static_dc.df %>% 
  mutate(id_1 = gsub(" .*", "", id_alt),
         id_2 = gsub(".* ", "", id_alt)) %>% 
  filter(id_1 != id_2)
#write_parquet(static_dc.df, here("data/disease_curve/static_dc.parquet"))
```

# further processing
```{r}
static_dc.df <- read_parquet(here("data/disease_curve/static_dc.parquet"))
# get winners and losers
## import in fitness
static_fitness.df <- read.csv(here("data/ci_static.csv"))
## get winner situation
static_fitness.df2 <- static_fitness.df %>% 
  filter(id_1 != id_2) %>% 
  mutate(winning_id = case_when(
    fitness_difference > 0 ~ id_1,
    fitness_difference< 0 ~ id_2
  ),
  losing_id = case_when(
    fitness_difference < 0 ~ id_1,
    fitness_difference> 0 ~ id_2
  ))

# left join
static_dc.df2 <- static_dc.df %>% 
  left_join(select(static_fitness.df2, id_1, id_2, winning_id, losing_id, fitness_difference), by = c("id_1", "id_2"))

# get winner-loser difference in terms of I+Ig also filter out to onyl very strong fitness difference
static_dc.df3 <- static_dc.df2 %>% 
  mutate(total_diff = case_when(
    fitness_difference > 0 ~ total1-total2,
    fitness_difference< 0 ~ total2-total2
  ),
  total_winner = case_when(
    fitness_difference > 0 ~ total1,
    fitness_difference< 0 ~ total2
  ),
  total_loser = case_when(
    fitness_difference > 0 ~ total2,
    fitness_difference< 0 ~ total1
  )) %>% 
  filter(abs(fitness_difference) > 0.5)
```

# plot
```{r}
static_dc.pl <- ggplot() +
  geom_path(data = static_dc.df3, aes(x= total_winner, y = R, group = id_alt, color = "Winner"), alpha = 0.5, arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  geom_path(data = static_dc.df3, aes(x= total_loser, y = R, group = id_alt, color = "Loser"),
          alpha = 0.5,arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  labs(color = "Status", x = "Asexual & sexual iRBC", y = "RBC") +
  scale_color_manual(values=c("Winner" = "#4575b4","Loser"= "#fc8d59"))  +
  theme_bw() + 
  scale_y_continuous(labels = function(x) format(x, scientific = TRUE, accuracy = 0.1))
```


#---------co-infection invasion ---------------#
# get invasion dynamic
```{r}
# get invasion df
invasion_fitness.df <- read.csv(here("data/ci_invasion.csv"))

# get cue range
ci_cue_range <- read.csv(here("data/cue_range_ci.csv"))
invasion_fitness.df2 <- invasion_fitness.df %>% 
  left_join(select(ci_cue_range, id, mut_cue = cue, mut_low = low, mut_high = high, mut_by = by), by = c("mut_id"= "id")) %>% 
  left_join(select(ci_cue_range, id, res_cue = cue, res_low = low, res_high = high, res_by = by), by = c("res_id"= "id"))
```

# function to get dynamic
```{r}
get_invasion_dyn <- function(df){
  # get cues
  mut_cue <- df$mut_cue
  res_cue <- df$res_cue
  
  # get info of cues (for co infection)
  if(stringr::str_detect(mut_cue, "-i")){mut_cue = gsub("*-i", "1", mut_cue)}
  if(stringr::str_detect(mut_cue, "-i", negate = T)){mut_cue = mut_cue}
  if(stringr::str_detect(res_cue, "-i")){res_cue = gsub("*-i", "2", res_cue)}
  if(stringr::str_detect(res_cue, "-i", negate = T)){res_cue = res_cue}
  
  # get log
  mut_log <- ifelse(stringr::str_detect(df$mut_id, "log"), "log10", "none")
  res_log <- ifelse(stringr::str_detect(df$res_id, "log"), "log10", "none")
  
  # get parameters
  mut_par <- c(df$mut_var1_opt, df$mut_var2_opt, df$mut_var3_opt, df$mut_var4_opt)
  res_par <- c(df$res_var1, df$res_var2, df$res_var3, df$res_var4)
  
  # get cue range
  mut_cue_range <- seq(df$mut_low, df$mut_high, by = df$mut_by)
  res_cue_range <- seq(df$res_low, df$res_high, by = df$res_by)
  
  # get dynamics of co infection
  ci_dyn <- chabaudi_ci_clean(
    parameters_cr_1 = mut_par,
    parameters_cr_2 = res_par,
                  immunity = "tsukushi",
                  parameters = parameters_tsukushi,
                  cue_1 = mut_cue,
                  cue_2 = res_cue,
                  cue_range_1 = mut_cue_range,
                 cue_range_2 = res_cue_range,
                log_cue_1 = mut_log,
                log_cue_2 = res_log,
                solver = "vode",
                time_range = seq(0, 30, 0.001),
    dyn = T)
  
  # append label to all df
 ci_dyn2 <- cbind(ci_dyn, mut_id = df$mut_id, res_id = df$res_id)
  
  # write
 write_parquet(ci_dyn2, paste0(here("data/ci_invasion_dyn/"), df$mut_id, "-", df$res_id, ".parquet"))
}
```


# run dynamic funciton
```{r}
# get function and parameters
source(here("functions/chabaudi_ci_clean.R"))
parameters_tsukushi <- c(R1 = 8.89*(10^6), # slightly higher
                lambda = 3.7*(10^5),
                mu = 0.025, 
                p = 8*(10^-6), # doubled form original
                alpha = 1, 
                alphag = 2, 
                beta = 5.721, 
                mum = 48, 
                mug = 4, 
                I0 = 43.85965, 
                Ig0 = 0, 
                a = 150, 
                b = 100, 
                sp = 1,
                psin = 16.69234,
                psiw = 0.8431785,
                phin = 0.03520591, 
                phiw = 550.842,
                iota = 2.18*(10^6),
                rho = 0.2627156)
# split
invasion.ls <- split(invasion_fitness.df2, seq(nrow(invasion_fitness.df2)))

# run function
mclapply(invasion.ls, get_invasion_dyn, mc.cores = 4)
```


# process data
```{r}
# import in invasion dynamics
invasion_dyn.ls <- list.files(path = here("data/ci_invasion_dyn"), pattern = "*.parquet", full.names = T)
invasion_dyn.ls <- lapply(invasion_dyn.ls, read_parquet)

# filter and so on
invasion_dyn.ls2 <- mclapply(invasion_dyn.ls[167:177], mc.cores = 4, function(x){
  x2 <- x %>% 
    filter(variable == "I1" | variable == "Ig1" | variable == "I2" | variable == "Ig2" | variable == "R") %>% 
    mutate(id_alt = paste(mut_id, res_id)) %>% 
    select(id_alt, time, variable, value) %>% 
    tidyr::pivot_wider(names_from = variable, values_from = value, id_cols = c(time, id_alt)) %>%
  mutate(total1 = I1+Ig1, total2 = I2+Ig2)
  
  write_parquet(x2, paste0(here("data/disease_curve/ci_invasion/"), unique(x2$id_alt), "_dc.parquet"))
})

# fetch data
invasion_dyn.ls2 <- list.files(path = here("data/disease_curve/ci_invasion"), pattern = "*.parquet", full.names = T)
invasion_dyn.ls2 <- lapply(invasion_dyn.ls2, read_parquet)
invasion_dc.df <- do.call(rbind, invasion_dyn.ls2)
invasion_dc.df <- invasion_dc.df %>% 
  mutate(mut_id = gsub(" .*", "", id_alt),
         res_id = gsub(".* ", "", id_alt)) %>% 
  filter(mut_id != res_id)
#write_parquet(invasion_dc.df, here("data/disease_curve/invasion_dc.parquet"))
```

# further processing
```{r}
invasion_dc.df <- read_parquet(here("data/disease_curve/invasion_dc.parquet"))
# get winners and losers
invasion_fitness.df <- read.csv(here("data/ci_invasion.csv"))
invasion_dc.df2 <- invasion_dc.df %>% 
  left_join(invasion_fitness.df, by = c("mut_id", "res_id")) %>% 
  mutate(
  total_winner = case_when(
    fitness> 0 ~ total1,
    fitness< 0 ~ total2
  ),
  total_loser = case_when(
    fitness > 0 ~ total2,
    fitness < 0 ~ total1
  )) %>% 
  filter(abs(fitness) > 0.5)
```

# plot
```{r}
invasion_dc.pl <- ggplot() +
  geom_path(data = invasion_dc.df2, aes(x= total_winner, y = R, group = id_alt, color = "Winner"), alpha = 0.5, arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  geom_path(data = invasion_dc.df2, aes(x= total_loser, y = R, group = id_alt, color = "Loser"),
          alpha = 0.5,arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  labs(color = "Status", x = "Asexual & sexual iRBC", y = "RBC") +
  scale_color_manual(values=c("Winner" = "#4575b4","Loser"= "#fc8d59"))  +
  theme_bw() + 
  scale_y_continuous(labels = function(x) format(x, scientific = TRUE, accuracy = 0.1)) %>% 
  theme(legend.position = "none")
```


#--------- quantifying disease curve area ------------#
# function to calculate area between sets of points -> from https://stackoverflow.com/questions/3672260/area-covered-by-a-point-cloud-with-r
```{r}
library(splancs)
cha<-function(df){
  x <- df$total
  y <- df$R
chull(x,y)->i
return(areapl(cbind(x[i],y[i])))
}
```

# loop to get area: single infection
```{r}
# split df
si_dc_high.ls <- split(si_dc.high, si_dc.high$ez_label_si)
si_dc_poor.ls <- split(si_dc.poor, si_dc.poor$id)

# get area
si_dc_high.area <- cbind.data.frame(area = as.numeric(lapply(si_dc_high.ls, cha)), id_alt = names(lapply(si_dc_high.ls, cha)))
si_dc_poor.area <- cbind.data.frame(area = as.numeric(lapply(si_dc_poor.ls, cha)), id_alt = names(lapply(si_dc_poor.ls, cha)))


# join with fitness
si_fitness.df <- si_fitness.df %>% left_join(ez_label, by = c("id" = "id_si"))

si_dc_high.area2 <- si_dc_high.area %>% 
  left_join(si_fitness.df, by = c("id_alt" = "ez_label_si")) %>% 
  select(value, area) %>% 
  mutate(condition = "Single infection")

si_dc_poor.area2 <- si_dc_poor.area %>% 
  left_join(si_fitness.df, by = c("id_alt" = "id")) %>% 
  select(value, area) %>% 
  mutate(condition = "Single infection")
```

# coinfection
```{r}
# split
ci_dc_high.ls <- split(ci_dc.high2, ci_dc.high2$ez_label)
ci_dc_poor.ls <- split(ci_dc.poor, ci_dc.poor$label)

# run function to find area
ci_dc_high.area <- cbind.data.frame(area = as.numeric(lapply(ci_dc_high.ls, cha)), id_alt = names(lapply(ci_dc_high.ls, cha)), value = unique(ci_dc.high2$value))

ci_dc_poor.area <- cbind.data.frame(area = as.numeric(lapply(ci_dc_poor.ls, cha)), id_alt = names(lapply(ci_dc_poor.ls, cha)), value = unique(ci_dc.poor$value))

# edit and join
ci_dc_high.area2 <-  ci_dc_high.area %>% 
  select(area, value) %>% 
  mutate(condition = "Co-infection")

ci_dc_poor.area2 <-  ci_dc_poor.area %>% 
  select(area, value) %>% 
  mutate(condition = "Co-infection")
```

# dual cue
```{r}
# split
dual.dc <- read_parquet(here("data/disease_curve/dual_dc.parquet"))
dual_dc.ls <- split(dual.dc, dual.dc$label_alt)

# get area
dual_dc.area <- cbind.data.frame(area = as.numeric(lapply(dual_dc.ls, cha)), id_alt = names(lapply(dual_dc.ls, cha)))

# bind with fitness
dual_fitness.df <- dual_fitness.df %>% mutate(id_alt = paste(label, "+", label_b))
dual_dc.area2 <- dual_dc.area %>% 
  left_join(dual_fitness.df, by = "id_alt") %>% 
  select(area, value) %>% 
  mutate(condition = "Dual-cue") %>% 
  filter(value > 2)
dual_dc.area2
```

#------ get fitted scatter plot for all single infection, co infection, and dual cue --------#
```{r}
# rbind
si.area <- rbind(si_dc_high.area2, si_dc_poor.area2)
ci.area <- rbind(ci_dc_high.area2, ci_dc_poor.area2)
dual.area <- dual_dc.area2

# plot
library("ggpmisc")

si_area.pl <- ggplot(data = si.area, aes(x = area, y = value)) +
  geom_point() +
  stat_poly_line(color = "black") +
  stat_poly_eq() +
  labs(x = "Area", y = "Fitness", color = "Status") +
  theme_bw()

ci_area.pl <- ggplot(data = ci.area, aes(x = area, y = value)) +
  geom_point() +
  stat_poly_line(color = "black") +
  stat_poly_eq() +
  labs(x = "Area", y = "Fitness", color = "Status") +
  theme_bw()

dual_area.pl <- ggplot(data = dual.area, aes(x = area, y = value)) +
  geom_point() +
  stat_poly_line(color= "black") +
  stat_poly_eq() +
  labs(x = "Area", y = "Fitness", color = "Status") +
  theme_bw()
```


#------- plot together with disease curve --------#
```{r}
# single infection
si_vir.pl <- ggarrange(si_dc.pl, si_area.pl, align = "v", widths = c(1, 0.45))

# co-infection
ci_vir.pl <- ggarrange(ci_dc.pl, ci_area.pl, align = "v", widths = c(1, 0.45))

# dual-cue
dual_vir.pl <- ggarrange(dual_dc.pl, dual_area.pl, align = "v", widths = c(1, 0.45))
```


#--------- static area comparison -------------#
# compute area
```{r}
# import in dc dynamic and fitness
static_dc.df <- read_parquet(here("data/disease_curve/static_dc.parquet"))
static_fitness.df <- read.csv(here("data/ci_static.csv"))

# get winner and loser
static_dc.df4 <- static_dc.df %>% 
  left_join(select(static_fitness.df, id_1, id_2, fitness_difference), by = c("id_1", "id_2")) %>%
  filter(id_1 != id_2) %>% 
  mutate(
  total_winner = case_when(
    fitness_difference > 0 ~ total1,
    fitness_difference< 0 ~ total2
  ),
  total_loser = case_when(
    fitness_difference > 0 ~ total2,
    fitness_difference< 0 ~ total1
  ))%>% 
  filter(abs(fitness_difference) > 0.5)

# split by winner and loser
static_dc.ls1 <- split(select(static_dc.df4, R, total = total_winner), static_dc.df4$id_alt)
static_dc.ls2 <- split(select(static_dc.df4, R, total = total_loser), static_dc.df4$id_alt)

# get area
static_win.area <- cbind.data.frame(area = as.numeric(lapply(static_dc.ls1, cha)), status = "Winner")
static_loser.area <- cbind.data.frame(area = as.numeric(lapply(static_dc.ls2, cha)), status = "Loser")

# pair
static.area <- cbind(select(static_win.area, Winner = area),
                     select(static_loser.area, Loser = area)) %>% 
  mutate(classification = ifelse(Winner>Loser, "Winner larger area", "Loser larger area"))
```

# plot static
```{r}
static_area.pl <- ggpaired(static.area, cond1 = "Winner", cond2 = "Loser", line.color = "classification", alpha = 0.2) +
  labs(x = "Status", y = "Area", color = "Comparison\n(Static)") +
  scale_color_manual(values = c("Loser larger area" = "#fc8d59", "Winner larger area" = "#4575b4")) +
  stat_compare_means(paired = TRUE, hjust = 0) +
  guides(color=guide_legend(nrow=2,byrow=TRUE))
```


#--------- invasion area comparison -----------------#
# get area
```{r}
# import in dc dynamic and fitness
invasion_dc.df <- read_parquet(here("data/disease_curve/invasion_dc.parquet"))
invasion_fitness.df <- read.csv(here("data/ci_invasion.csv"))

invasion_dc.df4 <- invasion_dc.df %>% 
  left_join(invasion_fitness.df, by = c("mut_id", "res_id")) %>% 
  mutate(
  total_winner = case_when(
    fitness> 0 ~ total1,
    fitness< 0 ~ total2
  ),
  total_loser = case_when(
    fitness > 0 ~ total2,
    fitness < 0 ~ total1
  )) %>% 
  filter(abs(fitness) > 0.5)

# split by winner and loser
invasion_dc.ls1 <- split(select(invasion_dc.df4, R, total = total_winner), invasion_dc.df4$id_alt)
invasion_dc.ls2 <- split(select(invasion_dc.df4, R, total = total_loser), invasion_dc.df4$id_alt)

# get area
invasion_win.area <- cbind.data.frame(area = as.numeric(lapply(invasion_dc.ls1, cha)), status = "Winner")
invasion_loser.area <- cbind.data.frame(area = as.numeric(lapply(invasion_dc.ls2, cha)), status = "Loser")

# pair
invasion.area <- cbind(select(invasion_win.area, Winner = area),
                     select(invasion_loser.area, Loser = area)) %>% 
  mutate(classification = ifelse(Winner>Loser, "Winner larger area", "Loser larger area"))
```

# plot
```{r}
invasion_area.pl <-ggpaired(invasion.area, cond1 = "Winner", cond2 = "Loser", line.color = "classification", alpha = 0.2) +
  labs(x = "Status", y = "Area", color = "Comparison\n(Invasive)") +
  scale_color_manual(values = c("Loser larger area" = "#fc8d59", "Winner larger area" = "#4575b4")) +
  stat_compare_means(paired = TRUE, hjust = 0) +
  guides(color=guide_legend(nrow=2,byrow=TRUE))
  
```

#------ plot together -------------#
```{r}
# pairwise comparison for static and invasive comeptition
heterocue_comp.pl <- ggarrange(static_area.pl, invasion_area.pl, ncol = 2, nrow = 1, align = "v")

# join inthe other disease curves
ggarrange(si_vir.pl, ci_vir.pl, dual_vir.pl, heterocue_comp.pl, ncol = 2, nrow = 2, labels = c("A", "B", "C", "D"), heights = c(1, 0.8))

ggsave(here("figures/plos-bio/virulence.tiff"), units = "px", width = 2250, height = 1600, scale = 2, dpi=300, bg = "white")
```


#====================================#
# dual cue dynamics figure
#===================================#

# get dual dynamics
```{r}
dual.dyn <- chabaudi_si_clean(
  parameters_cr = c(4.446192033,	10.97518275,	1.38762817,	23.3059254,	-3.452052371,	-18.0070692,	39.66614226,	-3.545193141,	18.78350799),
  immunity = "tsukushi",
  parameters = parameters_tsukushi,
  time_range = seq(0, 30, by = 1e-3),
  cue_range =  seq(6, 7, by = 1/500),
  cue_range_b = seq(0, log10(6*(10^6)), by = (log10(6*(10^6)))/500),
  cue = "R",
  cue_b = "I",
  log_cue = "log10",
  log_cue_b = "log10",
  solver = "vode",
  dyn = T
)

# filter out relevent dataframes
dual.dyn_f <- dual.dyn %>% 
  filter(variable %in% c("I", "Ig", "G", "R", "N", "W"))

# cr only
dual.dyn_cr <- dual.dyn %>% filter(variable == "cr")
```

# plot
```{r}
dual_I.plt <- ggplot() +
  geom_line(data = dual.dyn_f %>% filter(variable == "I"), aes(x = time, y = value/(10^5)),
            color = "#4575b4") +
  geom_point(data = dual.dyn_f %>% filter(variable == "I" & row_number() %% 1000 == 0), 
             aes(x = time, y = value/(10^5)), size = 2, color = "#4575b4") +
  geom_line(data = dual.dyn_cr, aes(x = time, y = value)) +
  scale_y_continuous(name = "Conversion rate",
                     sec.axis = sec_axis(~.*10^5, name="Asexual iRBC per µL")) +
  labs(x = "Time (days)") +
  xlim(0, 20) +
  theme_bw() +
  theme(legend.position = "none")

dual_Ig.plt <-ggplot() +
  geom_line(data = dual.dyn_f %>% filter(variable == "Ig"), aes(x = time, y = value/(10^5)),
            color = "#4575b4") +
  geom_point(data = dual.dyn_f %>% filter(variable == "Ig" & row_number() %% 1000 == 0), 
             aes(x = time, y = value/(10^5)), size = 2, color = "#4575b4") +
  geom_line(data = dual.dyn_cr, aes(x = time, y = value)) +
  scale_y_continuous(name = "Conversion rate",
                     sec.axis = sec_axis(~.*10^5, name="Sexual iRBC per µL")) +
  labs(x = "Time (days)") +
  xlim(0, 20) +
  theme_bw() +
  theme(legend.position = "none")

dual_G.plt <-ggplot() +
  geom_line(data = dual.dyn_f %>% filter(variable == "G"), aes(x = time, y = value/(10^4)),
            color = "#4575b4") +
  geom_point(data = dual.dyn_f %>% filter(variable == "G" & row_number() %% 1000 == 0), 
             aes(x = time, y = value/(10^4)), size = 2, color = "#4575b4") +
  geom_line(data = dual.dyn_cr, aes(x = time, y = value)) +
  scale_y_continuous(name = "Conversion rate",
                     sec.axis = sec_axis(~.*10^4, name="Gametocyte per µL")) +
  labs(x = "Time (days)") +
  xlim(0, 20) +
  theme_bw() +
  theme(legend.position = "none")

dual_R.plt <-ggplot() +
  geom_line(data = dual.dyn_f %>% filter(variable == "R"), aes(x = time, y = value/(10^7)),
            color = "#4575b4") +
  geom_point(data = dual.dyn_f %>% filter(variable == "R" & row_number() %% 1000 == 0), 
             aes(x = time, y = value/(10^7)), size = 2, color = "#4575b4") +
  geom_line(data = dual.dyn_cr, aes(x = time, y = value)) +
  scale_y_continuous(name = "Conversion rate",
                     sec.axis = sec_axis(~.*10^7, name="RBC per µL")) +
  labs(x = "Time (days)") +
  xlim(0, 20) +
  theme_bw() +
  theme(legend.position = "none")

dual_N.plt <-ggplot() +
  geom_line(data = dual.dyn_f %>% filter(variable == "N"), aes(x = time, y = value*10),
            color = "#4575b4") +
  geom_point(data = dual.dyn_f %>% filter(variable == "N" & row_number() %% 1000 == 0), 
             aes(x = time, y = value*10), size = 2, color = "#4575b4") +
  geom_line(data = dual.dyn_cr, aes(x = time, y = value)) +
  scale_y_continuous(name = "Conversion rate",
                     sec.axis = sec_axis(~.*0.1, name="Indiscriminate immunity")) +
  labs(x = "Time (days)") +
  xlim(0, 20) +
  theme_bw() +
  theme(legend.position = "none")

dual_W.plt <-ggplot() +
  geom_line(data = dual.dyn_f %>% filter(variable == "W"), aes(x = time, y = value*2),
            color = "#4575b4") +
  geom_point(data = dual.dyn_f %>% filter(variable == "W" & row_number() %% 1000 == 0), 
             aes(x = time, y = value*2), size = 2, color = "#4575b4") +
  geom_line(data = dual.dyn_cr, aes(x = time, y = value)) +
  scale_y_continuous(name = "Conversion rate",
                     sec.axis = sec_axis(~.*0.5, name="Targeted immunity")) +
  labs(x = "Time (days)") +
  xlim(0, 20) +
  theme_bw() +
  theme(legend.position = "none")
```

# plot together
```{r}
ggarrange(dual_I.plt, dual_Ig.plt, dual_G.plt, dual_R.plt, dual_N.plt, dual_W.plt, nrow = 3, ncol = 2, align = "hv", labels = c("A", "B", "C", "D", "E", "F"))
ggsave(here("figures/plos-bio/dual_dyn.tiff"), units = "px", width = 2250, height = 2000, scale = 1, dpi=300, bg = "white")
```

#======================================#
# Single and co-infection verification
#======================================#
# single infection
```{r}
# import in all single infection data
si_val.ls <- list.files(path = here("data/si_validation"), pattern = "*.csv", full.names = T)

si_val.df <- lapply(si_val.ls, read.csv)
si_val.df <- do.call(rbind, si_val.df)

# get max fitness from simulation. left join with si_opt
si_opt.df <- read.csv(here("data/si_opt.csv"))

# we can see that all of the randomly simulated models have a fitness value that is less than the optimized model
si_val.df2 <- select(si_val.df, V1, id) %>%
  left_join(si_opt.df, by =c("id" = "id")) %>% 
  mutate(fitness_difference = fitness_20 - V1) %>% 
  left_join(select(ez_label, id_si, ez_label_si), by = c("id" = "id_si"))
```

### Model validation
```{r}
# read in the results file
ci_val.ls <- list.files(path = here("data/ci_validation"), pattern = "*.csv", full.names = T)
ci_val.ls2 <- lapply(ci_val.ls, read.csv)
ci_val.df <- do.call(rbind, ci_val.ls2)

ci_val.df2
ci_val.df2 <- ci_val.df %>% 
  left_join(select(ez_label, label_ci, ez_label), by = c("label" = "label_ci"))
```

# plot
```{r}
si_val.plt <- ggplot(data = si_val.df2, aes(x = fitness_difference)) +
  geom_histogram(bins = 50) +
  geom_vline(xintercept = 0, color = "#fc8d59") +
  facet_wrap(~ez_label_si, scales = "free", ncol = 3) +
  labs(x = "Optimized fitness - random fitness", y = "Frequency") +
  theme_bw()

ci_val.plt <- ggplot(data = ci_val.df2, aes(x = V1)) +
  geom_histogram(bins = 50) +
  geom_vline(xintercept = 0, color = "#fc8d59") +
  facet_wrap(~ez_label, scales = "free", ncol = 4) +
  labs(x = "Fitness difference between\noptimized and random strain", y = "Frequency") +
  theme_bw()

ggarrange(si_val.plt, ci_val.plt, align = "hv", labels = c("A", "B"), widths = c(3,4))
ggsave(here("figures/plos-bio/validation.tiff"), units = "px", width = 2250, height = 1300, scale = 1.6, dpi=300, bg = "white")
```

#=========================#
# Monte carlo dynamics supplementary
#=========================#
# run code in report 16
```{r}
mc_dyn_a <- ggplot() +
  geom_line(data = reference.df, aes(x = time, y = cr)) +
  geom_ribbon(data = diff.df, aes(x = time, ymin = cr_bot, ymax = cr_top), alpha = 0.5, fill = "#fc8d59") +
  facet_wrap(~cue, ncol = 2) +
  labs(x = "Time (days)", y = "Conversion rate") +
  theme_bw()
  
# plot fitness timeseries. When if tiness lost? At the latter part
mc_dyn_b  <- ggplot() +
  geom_line(data = reference.df, aes(x = time, y = tau)) +
  geom_ribbon(data = diff.df, aes(x = time, ymin = tau_bot, ymax = tau_top), alpha = 0.5, fill = "#fc8d59") +
  facet_wrap(~cue, ncol = 2) +
  labs(x = "Time (days)", y = "Transmission potential") +
  theme_bw()

ggarrange(mc_dyn_a, mc_dyn_b, ncol = 1, align = "v", labels = c("A", "B"))
ggsave(here("figures/plos-bio/MC_dyn.tiff"), units = "px", width = 2000, height = 2600, scale = 1, dpi=300, bg = "white")
```



